From 53faa313b7628cd8c9fbb836544cc6482cafb7a4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 5 Oct 2018 08:46:54 -0700 Subject: [PATCH] Switch NCCL to build from open source (version 2.3.5-5) by default. Note to users manually patching ptxas from a later toolkit version: Building NCCL requires the same version of ptxas and nvlink. PiperOrigin-RevId: 215911973 --- configure.py | 17 +- tensorflow/workspace.bzl | 10 +- third_party/gpus/cuda_configure.bzl | 1941 ++++++++++++++------------- third_party/nccl/LICENSE | 225 +--- third_party/nccl/archive.BUILD | 179 +++ third_party/nccl/build_defs.bzl.tpl | 210 +++ third_party/nccl/nccl_archive.BUILD | 68 - third_party/nccl/nccl_configure.bzl | 204 +-- 8 files changed, 1565 insertions(+), 1289 deletions(-) create mode 100644 third_party/nccl/archive.BUILD create mode 100644 third_party/nccl/build_defs.bzl.tpl delete mode 100644 third_party/nccl/nccl_archive.BUILD diff --git a/configure.py b/configure.py index a88fdb35555..65b46229955 100644 --- a/configure.py +++ b/configure.py @@ -35,7 +35,6 @@ except ImportError: _DEFAULT_CUDA_VERSION = '9.0' _DEFAULT_CUDNN_VERSION = '7' -_DEFAULT_NCCL_VERSION = '2.2' _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0' _DEFAULT_CUDA_PATH = '/usr/local/cuda' _DEFAULT_CUDA_PATH_LINUX = '/opt/cuda' @@ -1109,18 +1108,17 @@ def set_tf_nccl_install_path(environ_cp): raise ValueError('Currently NCCL is only supported on Linux platforms.') ask_nccl_version = ( - 'Please specify the NCCL version you want to use. If NCCL %s is not ' - 'installed, then you can use version 1.3 that can be fetched ' - 'automatically but it may have worse performance with multiple GPUs. ' - '[Default is %s]: ') % (_DEFAULT_NCCL_VERSION, _DEFAULT_NCCL_VERSION) + 'Please specify the locally installed NCCL version you want to use. ' + '[Default is to use https://github.com/nvidia/nccl]: ') for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): tf_nccl_version = get_from_env_or_user_or_default( - environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, _DEFAULT_NCCL_VERSION) - tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1) + environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, '') - if tf_nccl_version == '1': - break # No need to get install path, NCCL 1 is a GitHub repo. + if not tf_nccl_version: + break # No need to get install path, building the open source code. + + tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1) # Look with ldconfig first if we can find the library in paths # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding @@ -1232,7 +1230,6 @@ def set_tf_nccl_install_path(environ_cp): environ_cp['TF_NCCL_VERSION'] = tf_nccl_version write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version) - def get_native_cuda_compute_capabilities(environ_cp): """Get native cuda compute capabilities. diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 72f3fd0cf87..8df41f96b8e 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -585,12 +585,12 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "nccl_archive", - build_file = clean_dep("//third_party:nccl/nccl_archive.BUILD"), - sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176", - strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7", + build_file = clean_dep("//third_party:nccl/archive.BUILD"), + sha256 = "19132b5127fa8e02d95a09795866923f04064c8f1e0770b2b42ab551408882a4", + strip_prefix = "nccl-f93fe9bfd94884cec2ba711897222e0df5569a53", urls = [ - "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz", - "https://github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz", + "https://mirror.bazel.build/github.com/nvidia/nccl/archive/f93fe9bfd94884cec2ba711897222e0df5569a53.tar.gz", + "https://github.com/nvidia/nccl/archive/f93fe9bfd94884cec2ba711897222e0df5569a53.tar.gz", ], ) diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 69f4599c169..831a3067b24 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -126,118 +126,141 @@ load( ) def _get_python_bin(repository_ctx): - """Gets the python bin path.""" - python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH) - if python_bin != None: - return python_bin - python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python" - python_bin_path = repository_ctx.which(python_bin_name) - if python_bin_path != None: - return str(python_bin_path) - auto_configure_fail("Cannot find python in PATH, please make sure " + - "python is installed and add its directory in PATH, or --define " + - "%s='/something/else'.\nPATH=%s" % ( - _PYTHON_BIN_PATH, - repository_ctx.os.environ.get("PATH", ""), - )) + """Gets the python bin path.""" + python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH) + if python_bin != None: + return python_bin + python_bin_name = "python.exe" if _is_windows(repository_ctx) else "python" + python_bin_path = repository_ctx.which(python_bin_name) + if python_bin_path != None: + return str(python_bin_path) + auto_configure_fail( + "Cannot find python in PATH, please make sure " + + "python is installed and add its directory in PATH, or --define " + + "%s='/something/else'.\nPATH=%s" % ( + _PYTHON_BIN_PATH, + repository_ctx.os.environ.get("PATH", ""), + )) + def _get_nvcc_tmp_dir_for_windows(repository_ctx): - """Return the tmp directory for nvcc to generate intermediate source files.""" - escaped_tmp_dir = escape_string( - get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace("\\", "\\\\"), - ) - return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir" + """Return the tmp directory for nvcc to generate intermediate source files.""" + escaped_tmp_dir = escape_string( + get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace( + "\\", "\\\\"),) + return escaped_tmp_dir + "\\\\nvcc_inter_files_tmp_dir" + def _get_msvc_compiler(repository_ctx): - vc_path = find_vc_path(repository_ctx) - return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/") + vc_path = find_vc_path(repository_ctx) + return find_msvc_tool(repository_ctx, vc_path, "cl.exe").replace("\\", "/") + def _get_win_cuda_defines(repository_ctx): - """Return CROSSTOOL defines for Windows""" - - # If we are not on Windows, return empty vaules for Windows specific fields. - # This ensures the CROSSTOOL file parser is happy. - if not _is_windows(repository_ctx): - return { - "%{msvc_env_tmp}": "", - "%{msvc_env_path}": "", - "%{msvc_env_include}": "", - "%{msvc_env_lib}": "", - "%{msvc_cl_path}": "", - "%{msvc_ml_path}": "", - "%{msvc_link_path}": "", - "%{msvc_lib_path}": "", - "%{cxx_builtin_include_directory}": "", - } - - vc_path = find_vc_path(repository_ctx) - if not vc_path: - auto_configure_fail("Visual C++ build tools not found on your machine." + - "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using") - return {} - - env = setup_vc_env_vars(repository_ctx, vc_path) - escaped_paths = escape_string(env["PATH"]) - escaped_include_paths = escape_string(env["INCLUDE"]) - escaped_lib_paths = escape_string(env["LIB"]) - escaped_tmp_dir = escape_string( - get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace("\\", "\\\\"), - ) - - msvc_cl_path = "windows/msvc_wrapper_for_nvcc.bat" - msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace("\\", "/") - msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace("\\", "/") - msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace("\\", "/") - - # nvcc will generate some temporary source files under %{nvcc_tmp_dir} - # The generated files are guranteed to have unique name, so they can share the same tmp directory - escaped_cxx_include_directories = ["cxx_builtin_include_directory: \"%s\"" % _get_nvcc_tmp_dir_for_windows(repository_ctx)] - for path in escaped_include_paths.split(";"): - if path: - escaped_cxx_include_directories.append("cxx_builtin_include_directory: \"%s\"" % path) + """Return CROSSTOOL defines for Windows""" + # If we are not on Windows, return empty vaules for Windows specific fields. + # This ensures the CROSSTOOL file parser is happy. + if not _is_windows(repository_ctx): return { - "%{msvc_env_tmp}": escaped_tmp_dir, - "%{msvc_env_path}": escaped_paths, - "%{msvc_env_include}": escaped_include_paths, - "%{msvc_env_lib}": escaped_lib_paths, - "%{msvc_cl_path}": msvc_cl_path, - "%{msvc_ml_path}": msvc_ml_path, - "%{msvc_link_path}": msvc_link_path, - "%{msvc_lib_path}": msvc_lib_path, - "%{cxx_builtin_include_directory}": "\n".join(escaped_cxx_include_directories), + "%{msvc_env_tmp}": "", + "%{msvc_env_path}": "", + "%{msvc_env_include}": "", + "%{msvc_env_lib}": "", + "%{msvc_cl_path}": "", + "%{msvc_ml_path}": "", + "%{msvc_link_path}": "", + "%{msvc_lib_path}": "", + "%{cxx_builtin_include_directory}": "", } + vc_path = find_vc_path(repository_ctx) + if not vc_path: + auto_configure_fail( + "Visual C++ build tools not found on your machine." + + "Please check your installation following https://docs.bazel.build/versions/master/windows.html#using" + ) + return {} + + env = setup_vc_env_vars(repository_ctx, vc_path) + escaped_paths = escape_string(env["PATH"]) + escaped_include_paths = escape_string(env["INCLUDE"]) + escaped_lib_paths = escape_string(env["LIB"]) + escaped_tmp_dir = escape_string( + get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace( + "\\", "\\\\"),) + + msvc_cl_path = "windows/msvc_wrapper_for_nvcc.bat" + msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace( + "\\", "/") + msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace( + "\\", "/") + msvc_lib_path = find_msvc_tool(repository_ctx, vc_path, "lib.exe").replace( + "\\", "/") + + # nvcc will generate some temporary source files under %{nvcc_tmp_dir} + # The generated files are guranteed to have unique name, so they can share the same tmp directory + escaped_cxx_include_directories = [ + "cxx_builtin_include_directory: \"%s\"" % + _get_nvcc_tmp_dir_for_windows(repository_ctx) + ] + for path in escaped_include_paths.split(";"): + if path: + escaped_cxx_include_directories.append( + "cxx_builtin_include_directory: \"%s\"" % path) + + return { + "%{msvc_env_tmp}": + escaped_tmp_dir, + "%{msvc_env_path}": + escaped_paths, + "%{msvc_env_include}": + escaped_include_paths, + "%{msvc_env_lib}": + escaped_lib_paths, + "%{msvc_cl_path}": + msvc_cl_path, + "%{msvc_ml_path}": + msvc_ml_path, + "%{msvc_link_path}": + msvc_link_path, + "%{msvc_lib_path}": + msvc_lib_path, + "%{cxx_builtin_include_directory}": + "\n".join(escaped_cxx_include_directories), + } + # TODO(dzc): Once these functions have been factored out of Bazel's # cc_configure.bzl, load them from @bazel_tools instead. # BEGIN cc_configure common functions. def find_cc(repository_ctx): - """Find the C++ compiler.""" - if _is_windows(repository_ctx): - return _get_msvc_compiler(repository_ctx) + """Find the C++ compiler.""" + if _is_windows(repository_ctx): + return _get_msvc_compiler(repository_ctx) - if _use_cuda_clang(repository_ctx): - target_cc_name = "clang" - cc_path_envvar = _CLANG_CUDA_COMPILER_PATH - if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG): - return "extra_tools/bin/clang" - else: - target_cc_name = "gcc" - cc_path_envvar = _GCC_HOST_COMPILER_PATH - cc_name = target_cc_name + if _use_cuda_clang(repository_ctx): + target_cc_name = "clang" + cc_path_envvar = _CLANG_CUDA_COMPILER_PATH + if _flag_enabled(repository_ctx, _TF_DOWNLOAD_CLANG): + return "extra_tools/bin/clang" + else: + target_cc_name = "gcc" + cc_path_envvar = _GCC_HOST_COMPILER_PATH + cc_name = target_cc_name + + if cc_path_envvar in repository_ctx.os.environ: + cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip() + if cc_name_from_env: + cc_name = cc_name_from_env + if cc_name.startswith("/"): + # Absolute path, maybe we should make this supported by our which function. + return cc_name + cc = repository_ctx.which(cc_name) + if cc == None: + fail(("Cannot find {}, either correct your path or set the {}" + + " environment variable").format(target_cc_name, cc_path_envvar)) + return cc - if cc_path_envvar in repository_ctx.os.environ: - cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip() - if cc_name_from_env: - cc_name = cc_name_from_env - if cc_name.startswith("/"): - # Absolute path, maybe we should make this supported by our which function. - return cc_name - cc = repository_ctx.which(cc_name) - if cc == None: - fail(("Cannot find {}, either correct your path or set the {}" + - " environment variable").format(target_cc_name, cc_path_envvar)) - return cc _INC_DIR_MARKER_BEGIN = "#include <...>" @@ -246,80 +269,82 @@ _OSX_FRAMEWORK_SUFFIX = " (framework directory)" _OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX) def _cxx_inc_convert(path): - """Convert path returned by cc -E xc++ in a complete path.""" - path = path.strip() - if path.endswith(_OSX_FRAMEWORK_SUFFIX): - path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip() - return path + """Convert path returned by cc -E xc++ in a complete path.""" + path = path.strip() + if path.endswith(_OSX_FRAMEWORK_SUFFIX): + path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip() + return path + def _normalize_include_path(repository_ctx, path): - """Normalizes include paths before writing them to the crosstool. + """Normalizes include paths before writing them to the crosstool. If path points inside the 'crosstool' folder of the repository, a relative path is returned. If path points outside the 'crosstool' folder, an absolute path is returned. """ - path = str(repository_ctx.path(path)) - crosstool_folder = str(repository_ctx.path(".").get_child("crosstool")) + path = str(repository_ctx.path(path)) + crosstool_folder = str(repository_ctx.path(".").get_child("crosstool")) + + if path.startswith(crosstool_folder): + # We drop the path to "$REPO/crosstool" and a trailing path separator. + return path[len(crosstool_folder) + 1:] + return path - if path.startswith(crosstool_folder): - # We drop the path to "$REPO/crosstool" and a trailing path separator. - return path[len(crosstool_folder) + 1:] - return path def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp): - """Compute the list of default C or C++ include directories.""" - if lang_is_cpp: - lang = "c++" - else: - lang = "c" - result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"]) - index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN) - if index1 == -1: - return [] - index1 = result.stderr.find("\n", index1) - if index1 == -1: - return [] - index2 = result.stderr.rfind("\n ") - if index2 == -1 or index2 < index1: - return [] - index2 = result.stderr.find("\n", index2 + 1) - if index2 == -1: - inc_dirs = result.stderr[index1 + 1:] - else: - inc_dirs = result.stderr[index1 + 1:index2].strip() + """Compute the list of default C or C++ include directories.""" + if lang_is_cpp: + lang = "c++" + else: + lang = "c" + result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"]) + index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN) + if index1 == -1: + return [] + index1 = result.stderr.find("\n", index1) + if index1 == -1: + return [] + index2 = result.stderr.rfind("\n ") + if index2 == -1 or index2 < index1: + return [] + index2 = result.stderr.find("\n", index2 + 1) + if index2 == -1: + inc_dirs = result.stderr[index1 + 1:] + else: + inc_dirs = result.stderr[index1 + 1:index2].strip() + + return [ + _normalize_include_path(repository_ctx, _cxx_inc_convert(p)) + for p in inc_dirs.split("\n") + ] - return [ - _normalize_include_path(repository_ctx, _cxx_inc_convert(p)) - for p in inc_dirs.split("\n") - ] def get_cxx_inc_directories(repository_ctx, cc): - """Compute the list of default C and C++ include directories.""" + """Compute the list of default C and C++ include directories.""" - # For some reason `clang -xc` sometimes returns include paths that are - # different from the ones from `clang -xc++`. (Symlink and a dir) - # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists - includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True) - includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False) + # For some reason `clang -xc` sometimes returns include paths that are + # different from the ones from `clang -xc++`. (Symlink and a dir) + # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists + includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True) + includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False) + + includes_cpp_set = depset(includes_cpp) + return includes_cpp + [ + inc for inc in includes_c if inc not in includes_cpp_set + ] - includes_cpp_set = depset(includes_cpp) - return includes_cpp + [ - inc - for inc in includes_c - if inc not in includes_cpp_set - ] def auto_configure_fail(msg): - """Output failure message when cuda configuration fails.""" - red = "\033[0;31m" - no_color = "\033[0m" - fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg)) + """Output failure message when cuda configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg)) # END cc_configure common functions (see TODO above). def _host_compiler_includes(repository_ctx, cc): - """Generates the cxx_builtin_include_directory entries for gcc inc dirs. + """Generates the cxx_builtin_include_directory entries for gcc inc dirs. Args: repository_ctx: The repository context. @@ -330,14 +355,15 @@ def _host_compiler_includes(repository_ctx, cc): host compiler include directories, which can be added to the CROSSTOOL file. """ - inc_dirs = get_cxx_inc_directories(repository_ctx, cc) - inc_entries = [] - for inc_dir in inc_dirs: - inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir) - return "\n".join(inc_entries) + inc_dirs = get_cxx_inc_directories(repository_ctx, cc) + inc_entries = [] + for inc_dir in inc_dirs: + inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir) + return "\n".join(inc_entries) + def _cuda_include_path(repository_ctx, cuda_config): - """Generates the cxx_builtin_include_directory entries for cuda inc dirs. + """Generates the cxx_builtin_include_directory entries for cuda inc dirs. Args: repository_ctx: The repository context. @@ -348,39 +374,41 @@ def _cuda_include_path(repository_ctx, cuda_config): host compiler include directories, which can be added to the CROSSTOOL file. """ - nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % - ( - cuda_config.cuda_toolkit_path, - ".exe" if cuda_config.cpu_value == "Windows" else "", - )) - result = repository_ctx.execute([ - nvcc_path, - "-v", - "/dev/null", - "-o", - "/dev/null", - ]) - target_dir = "" - for one_line in result.stderr.splitlines(): - if one_line.startswith("#$ _TARGET_DIR_="): - target_dir = (cuda_config.cuda_toolkit_path + "/" + - one_line.replace("#$ _TARGET_DIR_=", "") + "/include") - inc_entries = [] - if target_dir != "": - inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % target_dir) - default_include = cuda_config.cuda_toolkit_path + "/include" - inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % - default_include) - return "\n".join(inc_entries) + nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % ( + cuda_config.cuda_toolkit_path, + ".exe" if cuda_config.cpu_value == "Windows" else "", + )) + result = repository_ctx.execute([ + nvcc_path, + "-v", + "/dev/null", + "-o", + "/dev/null", + ]) + target_dir = "" + for one_line in result.stderr.splitlines(): + if one_line.startswith("#$ _TARGET_DIR_="): + target_dir = ( + cuda_config.cuda_toolkit_path + "/" + one_line.replace( + "#$ _TARGET_DIR_=", "") + "/include") + inc_entries = [] + if target_dir != "": + inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % target_dir) + default_include = cuda_config.cuda_toolkit_path + "/include" + inc_entries.append( + " cxx_builtin_include_directory: \"%s\"" % default_include) + return "\n".join(inc_entries) + def _enable_cuda(repository_ctx): - if "TF_NEED_CUDA" in repository_ctx.os.environ: - enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip() - return enable_cuda == "1" - return False + if "TF_NEED_CUDA" in repository_ctx.os.environ: + enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip() + return enable_cuda == "1" + return False -def _cuda_toolkit_path(repository_ctx): - """Finds the cuda toolkit directory. + +def cuda_toolkit_path(repository_ctx): + """Finds the cuda toolkit directory. Args: repository_ctx: The repository context. @@ -388,27 +416,31 @@ def _cuda_toolkit_path(repository_ctx): Returns: A speculative real path of the cuda toolkit install directory. """ - cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH - if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ: - cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip() - if not repository_ctx.path(cuda_toolkit_path).exists: - auto_configure_fail("Cannot find cuda toolkit path.") - return str(repository_ctx.path(cuda_toolkit_path).realpath) + cuda_toolkit_path = _DEFAULT_CUDA_TOOLKIT_PATH + if _CUDA_TOOLKIT_PATH in repository_ctx.os.environ: + cuda_toolkit_path = repository_ctx.os.environ[_CUDA_TOOLKIT_PATH].strip() + if not repository_ctx.path(cuda_toolkit_path).exists: + auto_configure_fail("Cannot find cuda toolkit path.") + return str(repository_ctx.path(cuda_toolkit_path).realpath) + def _cudnn_install_basedir(repository_ctx): - """Finds the cudnn install directory.""" - cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH - if _CUDNN_INSTALL_PATH in repository_ctx.os.environ: - cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip() - if not repository_ctx.path(cudnn_install_path).exists: - auto_configure_fail("Cannot find cudnn install path.") - return cudnn_install_path + """Finds the cudnn install directory.""" + cudnn_install_path = _DEFAULT_CUDNN_INSTALL_PATH + if _CUDNN_INSTALL_PATH in repository_ctx.os.environ: + cudnn_install_path = repository_ctx.os.environ[_CUDNN_INSTALL_PATH].strip() + if not repository_ctx.path(cudnn_install_path).exists: + auto_configure_fail("Cannot find cudnn install path.") + return cudnn_install_path + def matches_version(environ_version, detected_version): - """Checks whether the user-specified version matches the detected version. + """Checks whether the user-specified version matches the detected version. - This function performs a weak matching so that if the user specifies only the - major or major and minor versions, the versions are still considered matching + This function performs a weak matching so that if the user specifies only + the + major or major and minor versions, the versions are still considered + matching if the version parts match. To illustrate: environ_version detected_version result @@ -424,25 +456,25 @@ def matches_version(environ_version, detected_version): variables. detected_version: The version autodetected from the CUDA installation on the system. - Returns: True if user-specified version matches detected version and False otherwise. - """ - environ_version_parts = environ_version.split(".") - detected_version_parts = detected_version.split(".") - if len(detected_version_parts) < len(environ_version_parts): - return False - for i, part in enumerate(detected_version_parts): - if i >= len(environ_version_parts): - break - if part != environ_version_parts[i]: - return False - return True + """ + environ_version_parts = environ_version.split(".") + detected_version_parts = detected_version.split(".") + if len(detected_version_parts) < len(environ_version_parts): + return False + for i, part in enumerate(detected_version_parts): + if i >= len(environ_version_parts): + break + if part != environ_version_parts[i]: + return False + return True + _NVCC_VERSION_PREFIX = "Cuda compilation tools, release " def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value): - """Detects the version of CUDA installed on the system. + """Detects the version of CUDA installed on the system. Args: repository_ctx: The repository context. @@ -452,64 +484,61 @@ def _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value): String containing the version of CUDA. """ - # Run nvcc --version and find the line containing the CUDA version. - nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % - ( - cuda_toolkit_path, - ".exe" if cpu_value == "Windows" else "", - )) - if not nvcc_path.exists: - auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path)) - result = repository_ctx.execute([str(nvcc_path), "--version"]) - if result.stderr: - auto_configure_fail("Error running nvcc --version: %s" % result.stderr) - lines = result.stdout.splitlines() - version_line = lines[len(lines) - 1] - if version_line.find(_NVCC_VERSION_PREFIX) == -1: - auto_configure_fail( - "Could not parse CUDA version from nvcc --version. Got: %s" % - result.stdout, - ) + # Run nvcc --version and find the line containing the CUDA version. + nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % ( + cuda_toolkit_path, + ".exe" if cpu_value == "Windows" else "", + )) + if not nvcc_path.exists: + auto_configure_fail("Cannot find nvcc at %s" % str(nvcc_path)) + result = repository_ctx.execute([str(nvcc_path), "--version"]) + if result.stderr: + auto_configure_fail("Error running nvcc --version: %s" % result.stderr) + lines = result.stdout.splitlines() + version_line = lines[len(lines) - 1] + if version_line.find(_NVCC_VERSION_PREFIX) == -1: + auto_configure_fail( + "Could not parse CUDA version from nvcc --version. Got: %s" % + result.stdout,) - # Parse the CUDA version from the line containing the CUDA version. - prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "") - parts = prefix_removed.split(",") - if len(parts) != 2 or len(parts[0]) < 2: - auto_configure_fail( - "Could not parse CUDA version from nvcc --version. Got: %s" % - result.stdout, - ) - full_version = parts[1].strip() - if full_version.startswith("V"): - full_version = full_version[1:] + # Parse the CUDA version from the line containing the CUDA version. + prefix_removed = version_line.replace(_NVCC_VERSION_PREFIX, "") + parts = prefix_removed.split(",") + if len(parts) != 2 or len(parts[0]) < 2: + auto_configure_fail( + "Could not parse CUDA version from nvcc --version. Got: %s" % + result.stdout,) + full_version = parts[1].strip() + if full_version.startswith("V"): + full_version = full_version[1:] - # Check whether TF_CUDA_VERSION was set by the user and fail if it does not - # match the detected version. - environ_version = "" - if _TF_CUDA_VERSION in repository_ctx.os.environ: - environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip() - if environ_version and not matches_version(environ_version, full_version): - auto_configure_fail( - ("CUDA version detected from nvcc (%s) does not match " + - "TF_CUDA_VERSION (%s)") % (full_version, environ_version), - ) + # Check whether TF_CUDA_VERSION was set by the user and fail if it does not + # match the detected version. + environ_version = "" + if _TF_CUDA_VERSION in repository_ctx.os.environ: + environ_version = repository_ctx.os.environ[_TF_CUDA_VERSION].strip() + if environ_version and not matches_version(environ_version, full_version): + auto_configure_fail( + ("CUDA version detected from nvcc (%s) does not match " + + "TF_CUDA_VERSION (%s)") % (full_version, environ_version),) + + # We only use the version consisting of the major and minor version numbers. + version_parts = full_version.split(".") + if len(version_parts) < 2: + auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.") + if cpu_value == "Windows": + version = "64_%s%s" % (version_parts[0], version_parts[1]) + else: + version = "%s.%s" % (version_parts[0], version_parts[1]) + return version - # We only use the version consisting of the major and minor version numbers. - version_parts = full_version.split(".") - if len(version_parts) < 2: - auto_configure_fail("CUDA version detected from nvcc (%s) is incomplete.") - if cpu_value == "Windows": - version = "64_%s%s" % (version_parts[0], version_parts[1]) - else: - version = "%s.%s" % (version_parts[0], version_parts[1]) - return version _DEFINE_CUDNN_MAJOR = "#define CUDNN_MAJOR" _DEFINE_CUDNN_MINOR = "#define CUDNN_MINOR" _DEFINE_CUDNN_PATCHLEVEL = "#define CUDNN_PATCHLEVEL" def find_cuda_define(repository_ctx, header_dir, header_file, define): - """Returns the value of a #define in a header file. + """Returns the value of a #define in a header file. Greps through a header file and returns the value of the specified #define. If the #define is not found, then raise an error. @@ -524,52 +553,52 @@ def find_cuda_define(repository_ctx, header_dir, header_file, define): The value of the #define found in the header. """ - # Confirm location of the header and grep for the line defining the macro. - h_path = repository_ctx.path("%s/%s" % (header_dir, header_file)) - if not h_path.exists: - auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path))) - result = repository_ctx.execute( - # Grep one more lines as some #defines are splitted into two lines. - ["grep", "--color=never", "-A1", "-E", define, str(h_path)], - ) - if result.stderr: - auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr)) + # Confirm location of the header and grep for the line defining the macro. + h_path = repository_ctx.path("%s/%s" % (header_dir, header_file)) + if not h_path.exists: + auto_configure_fail("Cannot find %s at %s" % (header_file, str(h_path))) + result = repository_ctx.execute( + # Grep one more lines as some #defines are splitted into two lines. + ["grep", "--color=never", "-A1", "-E", define, + str(h_path)],) + if result.stderr: + auto_configure_fail("Error reading %s: %s" % (str(h_path), result.stderr)) - # Parse the version from the line defining the macro. - if result.stdout.find(define) == -1: - auto_configure_fail("Cannot find line containing '%s' in %s" % - (define, h_path)) + # Parse the version from the line defining the macro. + if result.stdout.find(define) == -1: + auto_configure_fail( + "Cannot find line containing '%s' in %s" % (define, h_path)) - # Split results to lines - lines = result.stdout.split("\n") - num_lines = len(lines) - for l in range(num_lines): - line = lines[l] - if define in line: # Find the line with define - version = line - if l != num_lines - 1 and line[-1] == "\\": # Add next line, if multiline - version = version[:-1] + lines[l + 1] - break + # Split results to lines + lines = result.stdout.split("\n") + num_lines = len(lines) + for l in range(num_lines): + line = lines[l] + if define in line: # Find the line with define + version = line + if l != num_lines - 1 and line[-1] == "\\": # Add next line, if multiline + version = version[:-1] + lines[l + 1] + break - # Remove any comments - version = version.split("//")[0] + # Remove any comments + version = version.split("//")[0] - # Remove define name - version = version.replace(define, "").strip() + # Remove define name + version = version.replace(define, "").strip() + + # Remove the code after the version number. + version_end = version.find(" ") + if version_end != -1: + if version_end == 0: + auto_configure_fail( + "Cannot extract the version from line containing '%s' in %s" % + (define, str(h_path)),) + version = version[:version_end].strip() + return version - # Remove the code after the version number. - version_end = version.find(" ") - if version_end != -1: - if version_end == 0: - auto_configure_fail( - "Cannot extract the version from line containing '%s' in %s" % - (define, str(h_path)), - ) - version = version[:version_end].strip() - return version def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value): - """Detects the version of cuDNN installed on the system. + """Detects the version of cuDNN installed on the system. Args: repository_ctx: The repository context. @@ -579,68 +608,68 @@ def _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value): Returns: A string containing the version of cuDNN. """ - cudnn_header_dir = _find_cudnn_header_dir( - repository_ctx, - cudnn_install_basedir, - ) - major_version = find_cuda_define( - repository_ctx, - cudnn_header_dir, - "cudnn.h", - _DEFINE_CUDNN_MAJOR, - ) - minor_version = find_cuda_define( - repository_ctx, - cudnn_header_dir, - "cudnn.h", - _DEFINE_CUDNN_MINOR, - ) - patch_version = find_cuda_define( - repository_ctx, - cudnn_header_dir, - "cudnn.h", - _DEFINE_CUDNN_PATCHLEVEL, - ) - full_version = "%s.%s.%s" % (major_version, minor_version, patch_version) + cudnn_header_dir = _find_cudnn_header_dir( + repository_ctx, + cudnn_install_basedir, + ) + major_version = find_cuda_define( + repository_ctx, + cudnn_header_dir, + "cudnn.h", + _DEFINE_CUDNN_MAJOR, + ) + minor_version = find_cuda_define( + repository_ctx, + cudnn_header_dir, + "cudnn.h", + _DEFINE_CUDNN_MINOR, + ) + patch_version = find_cuda_define( + repository_ctx, + cudnn_header_dir, + "cudnn.h", + _DEFINE_CUDNN_PATCHLEVEL, + ) + full_version = "%s.%s.%s" % (major_version, minor_version, patch_version) - # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not - # match the detected version. - environ_version = "" - if _TF_CUDNN_VERSION in repository_ctx.os.environ: - environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip() - if environ_version and not matches_version(environ_version, full_version): - cudnn_h_path = repository_ctx.path("%s/include/cudnn.h" % - cudnn_install_basedir) - auto_configure_fail( - ("cuDNN version detected from %s (%s) does not match " + - "TF_CUDNN_VERSION (%s)") % - (str(cudnn_h_path), full_version, environ_version), - ) + # Check whether TF_CUDNN_VERSION was set by the user and fail if it does not + # match the detected version. + environ_version = "" + if _TF_CUDNN_VERSION in repository_ctx.os.environ: + environ_version = repository_ctx.os.environ[_TF_CUDNN_VERSION].strip() + if environ_version and not matches_version(environ_version, full_version): + cudnn_h_path = repository_ctx.path( + "%s/include/cudnn.h" % cudnn_install_basedir) + auto_configure_fail(("cuDNN version detected from %s (%s) does not match " + + "TF_CUDNN_VERSION (%s)") % + (str(cudnn_h_path), full_version, environ_version),) - # We only use the major version since we use the libcudnn libraries that are - # only versioned with the major version (e.g. libcudnn.so.5). - version = major_version - if cpu_value == "Windows": - version = "64_" + version - return version + # We only use the major version since we use the libcudnn libraries that are + # only versioned with the major version (e.g. libcudnn.so.5). + version = major_version + if cpu_value == "Windows": + version = "64_" + version + return version + + +def compute_capabilities(repository_ctx): + """Returns a list of strings representing cuda compute capabilities.""" + if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ: + return _DEFAULT_CUDA_COMPUTE_CAPABILITIES + capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES] + capabilities = capabilities_str.split(",") + for capability in capabilities: + # Workaround for Skylark's lack of support for regex. This check should + # be equivalent to checking: + # if re.match("[0-9]+.[0-9]+", capability) == None: + parts = capability.split(".") + if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit(): + auto_configure_fail("Invalid compute capability: %s" % capability) + return capabilities -def _compute_capabilities(repository_ctx): - """Returns a list of strings representing cuda compute capabilities.""" - if _TF_CUDA_COMPUTE_CAPABILITIES not in repository_ctx.os.environ: - return _DEFAULT_CUDA_COMPUTE_CAPABILITIES - capabilities_str = repository_ctx.os.environ[_TF_CUDA_COMPUTE_CAPABILITIES] - capabilities = capabilities_str.split(",") - for capability in capabilities: - # Workaround for Skylark's lack of support for regex. This check should - # be equivalent to checking: - # if re.match("[0-9]+.[0-9]+", capability) == None: - parts = capability.split(".") - if len(parts) != 2 or not parts[0].isdigit() or not parts[1].isdigit(): - auto_configure_fail("Invalid compute capability: %s" % capability) - return capabilities def get_cpu_value(repository_ctx): - """Returns the name of the host operating system. + """Returns the name of the host operating system. Args: repository_ctx: The repository context. @@ -648,20 +677,22 @@ def get_cpu_value(repository_ctx): Returns: A string containing the name of the host operating system. """ - os_name = repository_ctx.os.name.lower() - if os_name.startswith("mac os"): - return "Darwin" - if os_name.find("windows") != -1: - return "Windows" - result = repository_ctx.execute(["uname", "-s"]) - return result.stdout.strip() + os_name = repository_ctx.os.name.lower() + if os_name.startswith("mac os"): + return "Darwin" + if os_name.find("windows") != -1: + return "Windows" + result = repository_ctx.execute(["uname", "-s"]) + return result.stdout.strip() + def _is_windows(repository_ctx): - """Returns true if the host operating system is windows.""" - return get_cpu_value(repository_ctx) == "Windows" + """Returns true if the host operating system is windows.""" + return get_cpu_value(repository_ctx) == "Windows" + def _lib_name(lib, cpu_value, version = "", static = False): - """Constructs the platform-specific name of a library. + """Constructs the platform-specific name of a library. Args: lib: The name of the library, such as "cudart" @@ -672,23 +703,24 @@ def _lib_name(lib, cpu_value, version = "", static = False): Returns: The platform-specific name of the library. """ - if cpu_value in ("Linux", "FreeBSD"): - if static: - return "lib%s.a" % lib - else: - if version: - version = ".%s" % version - return "lib%s.so%s" % (lib, version) - elif cpu_value == "Windows": - return "%s.lib" % lib - elif cpu_value == "Darwin": - if static: - return "lib%s.a" % lib - elif version: - version = ".%s" % version - return "lib%s%s.dylib" % (lib, version) + if cpu_value in ("Linux", "FreeBSD"): + if static: + return "lib%s.a" % lib else: - auto_configure_fail("Invalid cpu_value: %s" % cpu_value) + if version: + version = ".%s" % version + return "lib%s.so%s" % (lib, version) + elif cpu_value == "Windows": + return "%s.lib" % lib + elif cpu_value == "Darwin": + if static: + return "lib%s.a" % lib + elif version: + version = ".%s" % version + return "lib%s%s.dylib" % (lib, version) + else: + auto_configure_fail("Invalid cpu_value: %s" % cpu_value) + def _find_cuda_lib( lib, @@ -697,7 +729,7 @@ def _find_cuda_lib( basedir, version = "", static = False): - """Finds the given CUDA or cuDNN library on the system. + """Finds the given CUDA or cuDNN library on the system. Args: lib: The name of the library, such as "cudart" @@ -712,15 +744,16 @@ def _find_cuda_lib( file_name: The basename of the library found on the system. path: The full path to the library. """ - file_name = _lib_name(lib, cpu_value, version, static) - for relative_path in CUDA_LIB_PATHS: - path = repository_ctx.path("%s/%s%s" % (basedir, relative_path, file_name)) - if path.exists: - return struct(file_name = file_name, path = str(path.realpath)) - auto_configure_fail("Cannot find cuda library %s" % file_name) + file_name = _lib_name(lib, cpu_value, version, static) + for relative_path in CUDA_LIB_PATHS: + path = repository_ctx.path("%s/%s%s" % (basedir, relative_path, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + auto_configure_fail("Cannot find cuda library %s" % file_name) + def _find_cupti_header_dir(repository_ctx, cuda_config): - """Returns the path to the directory containing cupti.h + """Returns the path to the directory containing cupti.h On most systems, the cupti library is not installed in the same directory as the other CUDA libraries but rather in a special extras/CUPTI directory. @@ -732,14 +765,17 @@ def _find_cupti_header_dir(repository_ctx, cuda_config): Returns: The path of the directory containing the cupti header. """ - cuda_toolkit_path = cuda_config.cuda_toolkit_path - for relative_path in CUPTI_HEADER_PATHS: - if repository_ctx.path("%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists: - return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] - auto_configure_fail("Cannot find cupti.h under %s" % ", ".join([cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS])) + cuda_toolkit_path = cuda_config.cuda_toolkit_path + for relative_path in CUPTI_HEADER_PATHS: + if repository_ctx.path( + "%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists: + return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] + auto_configure_fail("Cannot find cupti.h under %s" % ", ".join( + [cuda_toolkit_path + "/" + s for s in CUPTI_HEADER_PATHS])) + def _find_cupti_lib(repository_ctx, cuda_config): - """Finds the cupti library on the system. + """Finds the cupti library on the system. On most systems, the cupti library is not installed in the same directory as the other CUDA libraries but rather in a special extras/CUPTI directory. @@ -753,23 +789,23 @@ def _find_cupti_lib(repository_ctx, cuda_config): file_name: The basename of the library found on the system. path: The full path to the library. """ - file_name = _lib_name( - "cupti", - cuda_config.cpu_value, - cuda_config.cuda_version, - ) - cuda_toolkit_path = cuda_config.cuda_toolkit_path - for relative_path in CUPTI_LIB_PATHS: - path = repository_ctx.path( - "%s/%s%s" % (cuda_toolkit_path, relative_path, file_name), - ) - if path.exists: - return struct(file_name = file_name, path = str(path.realpath)) + file_name = _lib_name( + "cupti", + cuda_config.cpu_value, + cuda_config.cuda_version, + ) + cuda_toolkit_path = cuda_config.cuda_toolkit_path + for relative_path in CUPTI_LIB_PATHS: + path = repository_ctx.path( + "%s/%s%s" % (cuda_toolkit_path, relative_path, file_name),) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + + auto_configure_fail("Cannot find cupti library %s" % file_name) - auto_configure_fail("Cannot find cupti library %s" % file_name) def _find_libs(repository_ctx, cuda_config): - """Returns the CUDA and cuDNN libraries on the system. + """Returns the CUDA and cuDNN libraries on the system. Args: repository_ctx: The repository context. @@ -778,64 +814,75 @@ def _find_libs(repository_ctx, cuda_config): Returns: Map of library names to structs of filename and path. """ - cpu_value = cuda_config.cpu_value - return { - "cuda": _find_cuda_lib("cuda", repository_ctx, cpu_value, cuda_config.cuda_toolkit_path), - "cudart": _find_cuda_lib( - "cudart", - repository_ctx, - cpu_value, - cuda_config.cuda_toolkit_path, - cuda_config.cuda_version, - ), - "cudart_static": _find_cuda_lib( - "cudart_static", - repository_ctx, - cpu_value, - cuda_config.cuda_toolkit_path, - cuda_config.cuda_version, - static = True, - ), - "cublas": _find_cuda_lib( - "cublas", - repository_ctx, - cpu_value, - cuda_config.cuda_toolkit_path, - cuda_config.cuda_version, - ), - "cusolver": _find_cuda_lib( - "cusolver", - repository_ctx, - cpu_value, - cuda_config.cuda_toolkit_path, - cuda_config.cuda_version, - ), - "curand": _find_cuda_lib( - "curand", - repository_ctx, - cpu_value, - cuda_config.cuda_toolkit_path, - cuda_config.cuda_version, - ), - "cufft": _find_cuda_lib( - "cufft", - repository_ctx, - cpu_value, - cuda_config.cuda_toolkit_path, - cuda_config.cuda_version, - ), - "cudnn": _find_cuda_lib( - "cudnn", - repository_ctx, - cpu_value, - cuda_config.cudnn_install_basedir, - cuda_config.cudnn_version, - ), - "cupti": _find_cupti_lib(repository_ctx, cuda_config), - } + cpu_value = cuda_config.cpu_value + return { + "cuda": + _find_cuda_lib("cuda", repository_ctx, cpu_value, + cuda_config.cuda_toolkit_path), + "cudart": + _find_cuda_lib( + "cudart", + repository_ctx, + cpu_value, + cuda_config.cuda_toolkit_path, + cuda_config.cuda_version, + ), + "cudart_static": + _find_cuda_lib( + "cudart_static", + repository_ctx, + cpu_value, + cuda_config.cuda_toolkit_path, + cuda_config.cuda_version, + static=True, + ), + "cublas": + _find_cuda_lib( + "cublas", + repository_ctx, + cpu_value, + cuda_config.cuda_toolkit_path, + cuda_config.cuda_version, + ), + "cusolver": + _find_cuda_lib( + "cusolver", + repository_ctx, + cpu_value, + cuda_config.cuda_toolkit_path, + cuda_config.cuda_version, + ), + "curand": + _find_cuda_lib( + "curand", + repository_ctx, + cpu_value, + cuda_config.cuda_toolkit_path, + cuda_config.cuda_version, + ), + "cufft": + _find_cuda_lib( + "cufft", + repository_ctx, + cpu_value, + cuda_config.cuda_toolkit_path, + cuda_config.cuda_version, + ), + "cudnn": + _find_cuda_lib( + "cudnn", + repository_ctx, + cpu_value, + cuda_config.cudnn_install_basedir, + cuda_config.cudnn_version, + ), + "cupti": + _find_cupti_lib(repository_ctx, cuda_config), + } + def _find_cuda_include_path(repository_ctx, cuda_config): - """Returns the path to the directory containing cuda.h + """Returns the path to the directory containing cuda.h Args: repository_ctx: The repository context. @@ -844,14 +891,16 @@ def _find_cuda_include_path(repository_ctx, cuda_config): Returns: The path of the directory containing the CUDA headers. """ - cuda_toolkit_path = cuda_config.cuda_toolkit_path - for relative_path in CUDA_INCLUDE_PATHS: - if repository_ctx.path("%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists: - return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] - auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path) + cuda_toolkit_path = cuda_config.cuda_toolkit_path + for relative_path in CUDA_INCLUDE_PATHS: + if repository_ctx.path( + "%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists: + return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] + auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path) + def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir): - """Returns the path to the directory containing cudnn.h + """Returns the path to the directory containing cudnn.h Args: repository_ctx: The repository context. @@ -861,15 +910,17 @@ def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir): Returns: The path of the directory containing the cudnn header. """ - for relative_path in CUDA_INCLUDE_PATHS: - if repository_ctx.path("%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists: - return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1] - if repository_ctx.path("/usr/include/cudnn.h").exists: - return "/usr/include" - auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir) + for relative_path in CUDA_INCLUDE_PATHS: + if repository_ctx.path( + "%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists: + return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1] + if repository_ctx.path("/usr/include/cudnn.h").exists: + return "/usr/include" + auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir) + def _find_nvvm_libdevice_dir(repository_ctx, cuda_config): - """Returns the path to the directory containing libdevice in bitcode format. + """Returns the path to the directory containing libdevice in bitcode format. Args: repository_ctx: The repository context. @@ -878,19 +929,23 @@ def _find_nvvm_libdevice_dir(repository_ctx, cuda_config): Returns: The path of the directory containing the CUDA headers. """ - cuda_toolkit_path = cuda_config.cuda_toolkit_path - for libdevice_file in NVVM_LIBDEVICE_FILES: - for relative_path in NVVM_LIBDEVICE_PATHS: - if repository_ctx.path("%s/%s%s" % (cuda_toolkit_path, relative_path, libdevice_file)).exists: - return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] - auto_configure_fail("Cannot find libdevice*.bc files under %s" % cuda_toolkit_path) + cuda_toolkit_path = cuda_config.cuda_toolkit_path + for libdevice_file in NVVM_LIBDEVICE_FILES: + for relative_path in NVVM_LIBDEVICE_PATHS: + if repository_ctx.path("%s/%s%s" % (cuda_toolkit_path, relative_path, + libdevice_file)).exists: + return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] + auto_configure_fail( + "Cannot find libdevice*.bc files under %s" % cuda_toolkit_path) + def _cudart_static_linkopt(cpu_value): - """Returns additional platform-specific linkopts for cudart.""" - return "" if cpu_value == "Darwin" else "\"-lrt\"," + """Returns additional platform-specific linkopts for cudart.""" + return "" if cpu_value == "Darwin" else "\"-lrt\"," + def _get_cuda_config(repository_ctx): - """Detects and returns information about the CUDA installation on the system. + """Detects and returns information about the CUDA installation on the system. Args: repository_ctx: The repository context. @@ -904,35 +959,39 @@ def _get_cuda_config(repository_ctx): compute_capabilities: A list of the system's CUDA compute capabilities. cpu_value: The name of the host operating system. """ - cpu_value = get_cpu_value(repository_ctx) - cuda_toolkit_path = _cuda_toolkit_path(repository_ctx) - cuda_version = _cuda_version(repository_ctx, cuda_toolkit_path, cpu_value) - cudnn_install_basedir = _cudnn_install_basedir(repository_ctx) - cudnn_version = _cudnn_version(repository_ctx, cudnn_install_basedir, cpu_value) - return struct( - cuda_toolkit_path = cuda_toolkit_path, - cudnn_install_basedir = cudnn_install_basedir, - cuda_version = cuda_version, - cudnn_version = cudnn_version, - compute_capabilities = _compute_capabilities(repository_ctx), - cpu_value = cpu_value, - ) + cpu_value = get_cpu_value(repository_ctx) + toolkit_path = cuda_toolkit_path(repository_ctx) + cuda_version = _cuda_version(repository_ctx, toolkit_path, cpu_value) + cudnn_install_basedir = _cudnn_install_basedir(repository_ctx) + cudnn_version = _cudnn_version(repository_ctx, cudnn_install_basedir, + cpu_value) + return struct( + cuda_toolkit_path=toolkit_path, + cudnn_install_basedir=cudnn_install_basedir, + cuda_version=cuda_version, + cudnn_version=cudnn_version, + compute_capabilities=compute_capabilities(repository_ctx), + cpu_value=cpu_value, + ) + def _tpl(repository_ctx, tpl, substitutions = {}, out = None): - if not out: - out = tpl.replace(":", "/") - repository_ctx.template( - out, - Label("//third_party/gpus/%s.tpl" % tpl), - substitutions, - ) + if not out: + out = tpl.replace(":", "/") + repository_ctx.template( + out, + Label("//third_party/gpus/%s.tpl" % tpl), + substitutions, + ) + def _file(repository_ctx, label): - repository_ctx.template( - label.replace(":", "/"), - Label("//third_party/gpus/%s.tpl" % label), - {}, - ) + repository_ctx.template( + label.replace(":", "/"), + Label("//third_party/gpus/%s.tpl" % label), + {}, + ) + _DUMMY_CROSSTOOL_BZL_FILE = """ def error_gpu_disabled(): @@ -960,81 +1019,99 @@ error_gpu_disabled() """ def _create_dummy_repository(repository_ctx): - cpu_value = get_cpu_value(repository_ctx) + cpu_value = get_cpu_value(repository_ctx) - # Set up BUILD file for cuda/. - _tpl( - repository_ctx, - "cuda:build_defs.bzl", - { - "%{cuda_is_configured}": "False", - "%{cuda_extra_copts}": "[]", - }, - ) - _tpl( - repository_ctx, - "cuda:BUILD", - { - "%{cuda_driver_lib}": _lib_name("cuda", cpu_value), - "%{cudart_static_lib}": _lib_name( - "cudart_static", - cpu_value, - static = True, - ), - "%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value), - "%{cudart_lib}": _lib_name("cudart", cpu_value), - "%{cublas_lib}": _lib_name("cublas", cpu_value), - "%{cusolver_lib}": _lib_name("cusolver", cpu_value), - "%{cudnn_lib}": _lib_name("cudnn", cpu_value), - "%{cufft_lib}": _lib_name("cufft", cpu_value), - "%{curand_lib}": _lib_name("curand", cpu_value), - "%{cupti_lib}": _lib_name("cupti", cpu_value), - "%{cuda_include_genrules}": "", - "%{cuda_headers}": "", - }, - ) + # Set up BUILD file for cuda/. + _tpl( + repository_ctx, + "cuda:build_defs.bzl", + { + "%{cuda_is_configured}": "False", + "%{cuda_extra_copts}": "[]", + }, + ) + _tpl( + repository_ctx, + "cuda:BUILD", + { + "%{cuda_driver_lib}": + _lib_name("cuda", cpu_value), + "%{cudart_static_lib}": + _lib_name( + "cudart_static", + cpu_value, + static=True, + ), + "%{cudart_static_linkopt}": + _cudart_static_linkopt(cpu_value), + "%{cudart_lib}": + _lib_name("cudart", cpu_value), + "%{cublas_lib}": + _lib_name("cublas", cpu_value), + "%{cusolver_lib}": + _lib_name("cusolver", cpu_value), + "%{cudnn_lib}": + _lib_name("cudnn", cpu_value), + "%{cufft_lib}": + _lib_name("cufft", cpu_value), + "%{curand_lib}": + _lib_name("curand", cpu_value), + "%{cupti_lib}": + _lib_name("cupti", cpu_value), + "%{cuda_include_genrules}": + "", + "%{cuda_headers}": + "", + }, + ) - # Create dummy files for the CUDA toolkit since they are still required by - # tensorflow/core/platform/default/build_config:cuda. - repository_ctx.file("cuda/cuda/include/cuda.h", "") - repository_ctx.file("cuda/cuda/include/cublas.h", "") - repository_ctx.file("cuda/cuda/include/cudnn.h", "") - repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h", "") - repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value)) - repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value)) - repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value)) - repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value)) - repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value)) - repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value)) - repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value)) - repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value)) - repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value)) + # Create dummy files for the CUDA toolkit since they are still required by + # tensorflow/core/platform/default/build_config:cuda. + repository_ctx.file("cuda/cuda/include/cuda.h", "") + repository_ctx.file("cuda/cuda/include/cublas.h", "") + repository_ctx.file("cuda/cuda/include/cudnn.h", "") + repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h", "") + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value)) + repository_ctx.file( + "cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value)) - # Set up cuda_config.h, which is used by - # tensorflow/stream_executor/dso_loader.cc. - _tpl( - repository_ctx, - "cuda:cuda_config.h", - { - "%{cuda_version}": _DEFAULT_CUDA_VERSION, - "%{cudnn_version}": _DEFAULT_CUDNN_VERSION, - "%{cuda_compute_capabilities}": ",".join([ - "CudaVersion(\"%s\")" % c - for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES - ]), - "%{cuda_toolkit_path}": _DEFAULT_CUDA_TOOLKIT_PATH, - }, - "cuda/cuda/cuda_config.h", - ) + # Set up cuda_config.h, which is used by + # tensorflow/stream_executor/dso_loader.cc. + _tpl( + repository_ctx, + "cuda:cuda_config.h", + { + "%{cuda_version}": + _DEFAULT_CUDA_VERSION, + "%{cudnn_version}": + _DEFAULT_CUDNN_VERSION, + "%{cuda_compute_capabilities}": + ",".join([ + "CudaVersion(\"%s\")" % c + for c in _DEFAULT_CUDA_COMPUTE_CAPABILITIES + ]), + "%{cuda_toolkit_path}": + _DEFAULT_CUDA_TOOLKIT_PATH, + }, + "cuda/cuda/cuda_config.h", + ) + + # If cuda_configure is not configured to build with GPU support, and the user + # attempts to build with --config=cuda, add a dummy build rule to intercept + # this and fail with an actionable error message. + repository_ctx.file( + "crosstool/error_gpu_disabled.bzl", + _DUMMY_CROSSTOOL_BZL_FILE, + ) + repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE) - # If cuda_configure is not configured to build with GPU support, and the user - # attempts to build with --config=cuda, add a dummy build rule to intercept - # this and fail with an actionable error message. - repository_ctx.file( - "crosstool/error_gpu_disabled.bzl", - _DUMMY_CROSSTOOL_BZL_FILE, - ) - repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE) def _execute( repository_ctx, @@ -1042,35 +1119,35 @@ def _execute( error_msg = None, error_details = None, empty_stdout_fine = False): - """Executes an arbitrary shell command. + """Executes an arbitrary shell command. Args: repository_ctx: the repository_ctx object cmdline: list of strings, the command to execute error_msg: string, a summary of the error if the command fails error_details: string, details about the error or steps to fix it - empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise - it's an error - Return: - the result of repository_ctx.execute(cmdline) - """ - result = repository_ctx.execute(cmdline) - if result.stderr or not (empty_stdout_fine or result.stdout): - auto_configure_fail( - "\n".join([ - error_msg.strip() if error_msg else "Repository command failed", - result.stderr.strip(), - error_details if error_details else "", - ]), - ) - return result + empty_stdout_fine: bool, if True, an empty stdout result is fine, + otherwise it's an error + Return: the result of repository_ctx.execute(cmdline) + """ + result = repository_ctx.execute(cmdline) + if result.stderr or not (empty_stdout_fine or result.stdout): + auto_configure_fail( + "\n".join([ + error_msg.strip() if error_msg else "Repository command failed", + result.stderr.strip(), + error_details if error_details else "", + ]),) + return result + def _norm_path(path): - """Returns a path with '/' and remove the trailing slash.""" - path = path.replace("\\", "/") - if path[-1] == "/": - path = path[:-1] - return path + """Returns a path with '/' and remove the trailing slash.""" + path = path.replace("\\", "/") + if path[-1] == "/": + path = path[:-1] + return path + def symlink_genrule_for_dir( repository_ctx, @@ -1079,167 +1156,174 @@ def symlink_genrule_for_dir( genrule_name, src_files = [], dest_files = []): - """Returns a genrule to symlink(or copy if on Windows) a set of files. + """Returns a genrule to symlink(or copy if on Windows) a set of files. If src_dir is passed, files will be read from the given directory; otherwise we assume files are in src_files and dest_files """ - if src_dir != None: - src_dir = _norm_path(src_dir) - dest_dir = _norm_path(dest_dir) - files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines())) + if src_dir != None: + src_dir = _norm_path(src_dir) + dest_dir = _norm_path(dest_dir) + files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines())) - # Create a list with the src_dir stripped to use for outputs. - dest_files = files.replace(src_dir, "").splitlines() - src_files = files.splitlines() - command = [] - if not _is_windows(repository_ctx): - # We clear folders that might have been generated previously to avoid - # undesired inclusions - command.append('if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi') - command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi') - command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi') - command.append('if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi') - outs = [] - for i in range(len(dest_files)): - if dest_files[i] != "": - # If we have only one file to link we do not want to use the dest_dir, as - # $(@D) will include the full path to the file. - dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i] + # Create a list with the src_dir stripped to use for outputs. + dest_files = files.replace(src_dir, "").splitlines() + src_files = files.splitlines() + command = [] + if not _is_windows(repository_ctx): + # We clear folders that might have been generated previously to avoid + # undesired inclusions + command.append('if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi') + command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi') + command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi') + command.append('if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi') + outs = [] + for i in range(len(dest_files)): + if dest_files[i] != "": + # If we have only one file to link we do not want to use the dest_dir, as + # $(@D) will include the full path to the file. + dest = "$(@D)/" + dest_dir + dest_files[i] if len( + dest_files) != 1 else "$(@D)/" + dest_files[i] + + # Copy the headers to create a sandboxable setup. + cmd = "cp -f" + command.append(cmd + ' "%s" "%s"' % (src_files[i], dest)) + outs.append(' "' + dest_dir + dest_files[i] + '",') + genrule = _genrule( + src_dir, + genrule_name, + " && ".join(command), + "\n".join(outs), + ) + return genrule - # Copy the headers to create a sandboxable setup. - cmd = "cp -f" - command.append(cmd + ' "%s" "%s"' % (src_files[i], dest)) - outs.append(' "' + dest_dir + dest_files[i] + '",') - genrule = _genrule( - src_dir, - genrule_name, - " && ".join(command), - "\n".join(outs), - ) - return genrule def _genrule(src_dir, genrule_name, command, outs): - """Returns a string with a genrule. + """Returns a string with a genrule. Genrule executes the given command and produces the given outputs. """ - return ( - "genrule(\n" + - ' name = "' + - genrule_name + '",\n' + - " outs = [\n" + - outs + - "\n ],\n" + - ' cmd = """\n' + - command + - '\n """,\n' + - ")\n" - ) + return ( + "genrule(\n" + ' name = "' + genrule_name + '",\n' + " outs = [\n" + + outs + "\n ],\n" + ' cmd = """\n' + command + '\n """,\n' + ")\n") + def _read_dir(repository_ctx, src_dir): - """Returns a string with all files in a directory. + """Returns a string with all files in a directory. Finds all files inside a directory, traversing subfolders and following symlinks. The returned string contains the full path of all files separated by line breaks. """ - if _is_windows(repository_ctx): - src_dir = src_dir.replace("/", "\\") - find_result = _execute( - repository_ctx, - ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"], - empty_stdout_fine = True, - ) + if _is_windows(repository_ctx): + src_dir = src_dir.replace("/", "\\") + find_result = _execute( + repository_ctx, + ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"], + empty_stdout_fine=True, + ) + + # src_files will be used in genrule.outs where the paths must + # use forward slashes. + result = find_result.stdout.replace("\\", "/") + else: + find_result = _execute( + repository_ctx, + ["find", src_dir, "-follow", "-type", "f"], + empty_stdout_fine=True, + ) + result = find_result.stdout + return result - # src_files will be used in genrule.outs where the paths must - # use forward slashes. - result = find_result.stdout.replace("\\", "/") - else: - find_result = _execute( - repository_ctx, - ["find", src_dir, "-follow", "-type", "f"], - empty_stdout_fine = True, - ) - result = find_result.stdout - return result def _flag_enabled(repository_ctx, flag_name): - if flag_name in repository_ctx.os.environ: - value = repository_ctx.os.environ[flag_name].strip() - return value == "1" - return False + if flag_name in repository_ctx.os.environ: + value = repository_ctx.os.environ[flag_name].strip() + return value == "1" + return False + def _use_cuda_clang(repository_ctx): - return _flag_enabled(repository_ctx, "TF_CUDA_CLANG") + return _flag_enabled(repository_ctx, "TF_CUDA_CLANG") + def _compute_cuda_extra_copts(repository_ctx, compute_capabilities): - if _use_cuda_clang(repository_ctx): - capability_flags = ["--cuda-gpu-arch=sm_" + - cap.replace(".", "") for cap in compute_capabilities] - else: - # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc - capability_flags = [] - return str(capability_flags) + if _use_cuda_clang(repository_ctx): + capability_flags = [ + "--cuda-gpu-arch=sm_" + cap.replace(".", "") + for cap in compute_capabilities + ] + else: + # Capabilities are handled in the "crosstool_wrapper_driver_is_not_gcc" for nvcc + # TODO(csigg): Make this consistent with cuda clang and pass to crosstool. + capability_flags = [] + return str(capability_flags) + def _create_local_cuda_repository(repository_ctx): - """Creates the repository containing files set up to build with CUDA.""" - cuda_config = _get_cuda_config(repository_ctx) + """Creates the repository containing files set up to build with CUDA.""" + cuda_config = _get_cuda_config(repository_ctx) - cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config) - cudnn_header_dir = _find_cudnn_header_dir( - repository_ctx, - cuda_config.cudnn_install_basedir, - ) - cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config) - nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config) + cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config) + cudnn_header_dir = _find_cudnn_header_dir( + repository_ctx, + cuda_config.cudnn_install_basedir, + ) + cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config) + nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config) - # Set up symbolic links for the cuda toolkit by creating genrules to do - # symlinking. We create one genrule for each directory we want to track under - # cuda_toolkit_path - cuda_toolkit_path = cuda_config.cuda_toolkit_path - genrules = [symlink_genrule_for_dir( - repository_ctx, - cuda_include_path, - "cuda/include", - "cuda-include", - )] - genrules.append(symlink_genrule_for_dir( - repository_ctx, - nvvm_libdevice_dir, - "cuda/nvvm/libdevice", - "cuda-nvvm", - )) - genrules.append(symlink_genrule_for_dir( - repository_ctx, - cupti_header_dir, - "cuda/extras/CUPTI/include", - "cuda-extras", - )) + # Set up symbolic links for the cuda toolkit by creating genrules to do + # symlinking. We create one genrule for each directory we want to track under + # cuda_toolkit_path + cuda_toolkit_path = cuda_config.cuda_toolkit_path + genrules = [ + symlink_genrule_for_dir( + repository_ctx, + cuda_include_path, + "cuda/include", + "cuda-include", + ) + ] + genrules.append( + symlink_genrule_for_dir( + repository_ctx, + nvvm_libdevice_dir, + "cuda/nvvm/libdevice", + "cuda-nvvm", + )) + genrules.append( + symlink_genrule_for_dir( + repository_ctx, + cupti_header_dir, + "cuda/extras/CUPTI/include", + "cuda-extras", + )) - cuda_libs = _find_libs(repository_ctx, cuda_config) - cuda_lib_src = [] - cuda_lib_dest = [] - for lib in cuda_libs.values(): - cuda_lib_src.append(lib.path) - cuda_lib_dest.append("cuda/lib/" + lib.file_name) - genrules.append(symlink_genrule_for_dir( - repository_ctx, - None, - "", - "cuda-lib", - cuda_lib_src, - cuda_lib_dest, - )) + cuda_libs = _find_libs(repository_ctx, cuda_config) + cuda_lib_src = [] + cuda_lib_dest = [] + for lib in cuda_libs.values(): + cuda_lib_src.append(lib.path) + cuda_lib_dest.append("cuda/lib/" + lib.file_name) + genrules.append( + symlink_genrule_for_dir( + repository_ctx, + None, + "", + "cuda-lib", + cuda_lib_src, + cuda_lib_dest, + )) - # Set up the symbolic links for cudnn if cndnn was not installed to - # CUDA_TOOLKIT_PATH. - included_files = _read_dir(repository_ctx, cuda_include_path).replace( - cuda_include_path, - "", - ).splitlines() - if "/cudnn.h" not in included_files: - genrules.append(symlink_genrule_for_dir( + # Set up the symbolic links for cudnn if cndnn was not installed to + # CUDA_TOOLKIT_PATH. + included_files = _read_dir(repository_ctx, cuda_include_path).replace( + cuda_include_path, + "", + ).splitlines() + if "/cudnn.h" not in included_files: + genrules.append( + symlink_genrule_for_dir( repository_ctx, None, "cuda/include/", @@ -1247,204 +1331,229 @@ def _create_local_cuda_repository(repository_ctx): [cudnn_header_dir + "/cudnn.h"], ["cudnn.h"], )) - else: - genrules.append( - "filegroup(\n" + - ' name = "cudnn-include",\n' + - " srcs = [],\n" + - ")\n", - ) + else: + genrules.append( + "filegroup(\n" + ' name = "cudnn-include",\n' + " srcs = [],\n" + + ")\n",) - # Set up BUILD file for cuda/ - _tpl( - repository_ctx, - "cuda:build_defs.bzl", - { - "%{cuda_is_configured}": "True", - "%{cuda_extra_copts}": _compute_cuda_extra_copts( - repository_ctx, - cuda_config.compute_capabilities, - ), - }, - ) - _tpl( - repository_ctx, - "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD", - { - "%{cuda_driver_lib}": cuda_libs["cuda"].file_name, - "%{cudart_static_lib}": cuda_libs["cudart_static"].file_name, - "%{cudart_static_linkopt}": _cudart_static_linkopt( - cuda_config.cpu_value, - ), - "%{cudart_lib}": cuda_libs["cudart"].file_name, - "%{cublas_lib}": cuda_libs["cublas"].file_name, - "%{cusolver_lib}": cuda_libs["cusolver"].file_name, - "%{cudnn_lib}": cuda_libs["cudnn"].file_name, - "%{cufft_lib}": cuda_libs["cufft"].file_name, - "%{curand_lib}": cuda_libs["curand"].file_name, - "%{cupti_lib}": cuda_libs["cupti"].file_name, - "%{cuda_include_genrules}": "\n".join(genrules), - "%{cuda_headers}": ('":cuda-include",\n' + - ' ":cudnn-include",'), - }, - "cuda/BUILD", - ) + # Set up BUILD file for cuda/ + _tpl( + repository_ctx, + "cuda:build_defs.bzl", + { + "%{cuda_is_configured}": + "True", + "%{cuda_extra_copts}": + _compute_cuda_extra_copts( + repository_ctx, + cuda_config.compute_capabilities, + ), + }, + ) + _tpl( + repository_ctx, + "cuda:BUILD.windows" if _is_windows(repository_ctx) else "cuda:BUILD", + { + "%{cuda_driver_lib}": + cuda_libs["cuda"].file_name, + "%{cudart_static_lib}": + cuda_libs["cudart_static"].file_name, + "%{cudart_static_linkopt}": + _cudart_static_linkopt(cuda_config.cpu_value,), + "%{cudart_lib}": + cuda_libs["cudart"].file_name, + "%{cublas_lib}": + cuda_libs["cublas"].file_name, + "%{cusolver_lib}": + cuda_libs["cusolver"].file_name, + "%{cudnn_lib}": + cuda_libs["cudnn"].file_name, + "%{cufft_lib}": + cuda_libs["cufft"].file_name, + "%{curand_lib}": + cuda_libs["curand"].file_name, + "%{cupti_lib}": + cuda_libs["cupti"].file_name, + "%{cuda_include_genrules}": + "\n".join(genrules), + "%{cuda_headers}": ('":cuda-include",\n' + ' ":cudnn-include",' + ), + }, + "cuda/BUILD", + ) - is_cuda_clang = _use_cuda_clang(repository_ctx) + is_cuda_clang = _use_cuda_clang(repository_ctx) - should_download_clang = is_cuda_clang and _flag_enabled( - repository_ctx, - _TF_DOWNLOAD_CLANG, - ) - if should_download_clang: - download_clang(repository_ctx, "crosstool/extra_tools") + should_download_clang = is_cuda_clang and _flag_enabled( + repository_ctx, + _TF_DOWNLOAD_CLANG, + ) + if should_download_clang: + download_clang(repository_ctx, "crosstool/extra_tools") - # Set up crosstool/ - cc = find_cc(repository_ctx) - cc_fullpath = cc if not should_download_clang else "crosstool/" + cc + # Set up crosstool/ + cc = find_cc(repository_ctx) + cc_fullpath = cc if not should_download_clang else "crosstool/" + cc - host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath) - cuda_defines = {} - # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see - # https://github.com/bazelbuild/bazel/issues/760). - # However, this stops our custom clang toolchain from picking the provided - # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded - # toolchain. - # TODO: when bazel stops adding '-B/usr/bin' by default, remove this - # flag from the CROSSTOOL completely (see - # https://github.com/bazelbuild/bazel/issues/5634) - if should_download_clang: - cuda_defines["%{linker_bin_path_flag}"] = "" - else: - cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"' + host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath) + cuda_defines = {} + # Bazel sets '-B/usr/bin' flag to workaround build errors on RHEL (see + # https://github.com/bazelbuild/bazel/issues/760). + # However, this stops our custom clang toolchain from picking the provided + # LLD linker, so we're only adding '-B/usr/bin' when using non-downloaded + # toolchain. + # TODO: when bazel stops adding '-B/usr/bin' by default, remove this + # flag from the CROSSTOOL completely (see + # https://github.com/bazelbuild/bazel/issues/5634) + if should_download_clang: + cuda_defines["%{linker_bin_path_flag}"] = "" + else: + cuda_defines["%{linker_bin_path_flag}"] = 'flag: "-B/usr/bin"' - if is_cuda_clang: - cuda_defines["%{host_compiler_path}"] = str(cc) - cuda_defines["%{host_compiler_warnings}"] = """ + if is_cuda_clang: + cuda_defines["%{host_compiler_path}"] = str(cc) + cuda_defines["%{host_compiler_warnings}"] = """ # Some parts of the codebase set -Werror and hit this warning, so # switch it off for now. flag: "-Wno-invalid-partial-specialization" """ - cuda_defines["%{host_compiler_includes}"] = host_compiler_includes - _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"}) - repository_ctx.file("crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "") - repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "") - repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.bat", "") - else: - cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc" - cuda_defines["%{host_compiler_warnings}"] = "" - - # nvcc has the system include paths built in and will automatically - # search them; we cannot work around that, so we add the relevant cuda - # system paths to the allowed compiler specific include paths. - cuda_defines["%{host_compiler_includes}"] = ( - host_compiler_includes + "\n" + - _cuda_include_path(repository_ctx, cuda_config) + - "\n cxx_builtin_include_directory: \"%s\"" % cupti_header_dir + - "\n cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir) - nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" % - ( - cuda_config.cuda_toolkit_path, - ".exe" if _is_windows(repository_ctx) else "", - ))) - _tpl( - repository_ctx, - "crosstool:BUILD", - { - "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc", - "%{win_linker_files}": ":windows_msvc_wrapper_files", - }, - ) - wrapper_defines = { - "%{cpu_compiler}": str(cc), - "%{cuda_version}": cuda_config.cuda_version, - "%{nvcc_path}": nvcc_path, - "%{gcc_host_compiler_path}": str(cc), - "%{cuda_compute_capabilities}": ", ".join( - ["\"%s\"" % c for c in cuda_config.compute_capabilities], - ), - "%{nvcc_tmp_dir}": _get_nvcc_tmp_dir_for_windows(repository_ctx), - } - _tpl( - repository_ctx, - "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc", - wrapper_defines, - ) - _tpl( - repository_ctx, - "crosstool:windows/msvc_wrapper_for_nvcc.py", - wrapper_defines, - ) - _tpl( - repository_ctx, - "crosstool:windows/msvc_wrapper_for_nvcc.bat", - { - "%{python_binary}": _get_python_bin(repository_ctx), - }, - ) + cuda_defines["%{host_compiler_includes}"] = host_compiler_includes + _tpl(repository_ctx, "crosstool:BUILD", { + "%{linker_files}": ":empty", + "%{win_linker_files}": ":empty" + }) + repository_ctx.file( + "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "") + repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "") + repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.bat", "") + else: + cuda_defines[ + "%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc" + cuda_defines["%{host_compiler_warnings}"] = "" + # nvcc has the system include paths built in and will automatically + # search them; we cannot work around that, so we add the relevant cuda + # system paths to the allowed compiler specific include paths. + cuda_defines["%{host_compiler_includes}"] = ( + host_compiler_includes + "\n" + _cuda_include_path( + repository_ctx, cuda_config) + + "\n cxx_builtin_include_directory: \"%s\"" % cupti_header_dir + + "\n cxx_builtin_include_directory: \"%s\"" % cudnn_header_dir) + nvcc_path = str( + repository_ctx.path("%s/bin/nvcc%s" % ( + cuda_config.cuda_toolkit_path, + ".exe" if _is_windows(repository_ctx) else "", + ))) _tpl( repository_ctx, - "crosstool:CROSSTOOL", - cuda_defines + _get_win_cuda_defines(repository_ctx), - out = "crosstool/CROSSTOOL", - ) - - # Set up cuda_config.h, which is used by - # tensorflow/stream_executor/dso_loader.cc. - _tpl( - repository_ctx, - "cuda:cuda_config.h", + "crosstool:BUILD", { - "%{cuda_version}": cuda_config.cuda_version, - "%{cudnn_version}": cuda_config.cudnn_version, - "%{cuda_compute_capabilities}": ",".join( - [ - "CudaVersion(\"%s\")" % c - for c in cuda_config.compute_capabilities - ], - ), - "%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path, + "%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc", + "%{win_linker_files}": ":windows_msvc_wrapper_files", }, - "cuda/cuda/cuda_config.h", ) + wrapper_defines = { + "%{cpu_compiler}": + str(cc), + "%{cuda_version}": + cuda_config.cuda_version, + "%{nvcc_path}": + nvcc_path, + "%{gcc_host_compiler_path}": + str(cc), + "%{cuda_compute_capabilities}": + ", ".join( + ["\"%s\"" % c for c in cuda_config.compute_capabilities],), + "%{nvcc_tmp_dir}": + _get_nvcc_tmp_dir_for_windows(repository_ctx), + } + _tpl( + repository_ctx, + "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc", + wrapper_defines, + ) + _tpl( + repository_ctx, + "crosstool:windows/msvc_wrapper_for_nvcc.py", + wrapper_defines, + ) + _tpl( + repository_ctx, + "crosstool:windows/msvc_wrapper_for_nvcc.bat", + { + "%{python_binary}": _get_python_bin(repository_ctx), + }, + ) + + _tpl( + repository_ctx, + "crosstool:CROSSTOOL", + cuda_defines + _get_win_cuda_defines(repository_ctx), + out="crosstool/CROSSTOOL", + ) + + # Set up cuda_config.h, which is used by + # tensorflow/stream_executor/dso_loader.cc. + _tpl( + repository_ctx, + "cuda:cuda_config.h", + { + "%{cuda_version}": + cuda_config.cuda_version, + "%{cudnn_version}": + cuda_config.cudnn_version, + "%{cuda_compute_capabilities}": + ",".join([ + "CudaVersion(\"%s\")" % c + for c in cuda_config.compute_capabilities + ],), + "%{cuda_toolkit_path}": + cuda_config.cuda_toolkit_path, + }, + "cuda/cuda/cuda_config.h", + ) + def _create_remote_cuda_repository(repository_ctx, remote_config_repo): - """Creates pointers to a remotely configured repo set up to build with CUDA.""" - _tpl( - repository_ctx, - "cuda:build_defs.bzl", - { - "%{cuda_is_configured}": "True", - "%{cuda_extra_copts}": _compute_cuda_extra_copts( - repository_ctx, - _compute_capabilities(repository_ctx), - ), - }, - ) - _tpl( - repository_ctx, - "cuda:remote.BUILD", - { - "%{remote_cuda_repo}": remote_config_repo, - }, - "cuda/BUILD", - ) - _tpl(repository_ctx, "crosstool:remote.BUILD", { - "%{remote_cuda_repo}": remote_config_repo, - }, "crosstool/BUILD") + """Creates pointers to a remotely configured repo set up to build with CUDA.""" + _tpl( + repository_ctx, + "cuda:build_defs.bzl", + { + "%{cuda_is_configured}": + "True", + "%{cuda_extra_copts}": + _compute_cuda_extra_copts( + repository_ctx, + compute_capabilities(repository_ctx), + ), + }, + ) + _tpl( + repository_ctx, + "cuda:remote.BUILD", + { + "%{remote_cuda_repo}": remote_config_repo, + }, + "cuda/BUILD", + ) + _tpl(repository_ctx, "crosstool:remote.BUILD", { + "%{remote_cuda_repo}": remote_config_repo, + }, "crosstool/BUILD") + def _cuda_autoconf_impl(repository_ctx): - """Implementation of the cuda_autoconf repository rule.""" - if not _enable_cuda(repository_ctx): - _create_dummy_repository(repository_ctx) - elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ: - _create_remote_cuda_repository( - repository_ctx, - repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO], - ) - else: - _create_local_cuda_repository(repository_ctx) + """Implementation of the cuda_autoconf repository rule.""" + if not _enable_cuda(repository_ctx): + _create_dummy_repository(repository_ctx) + elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ: + _create_remote_cuda_repository( + repository_ctx, + repository_ctx.os.environ[_TF_CUDA_CONFIG_REPO], + ) + else: + _create_local_cuda_repository(repository_ctx) + cuda_configure = repository_rule( implementation = _cuda_autoconf_impl, diff --git a/third_party/nccl/LICENSE b/third_party/nccl/LICENSE index 146d9b765c5..b9585181860 100644 --- a/third_party/nccl/LICENSE +++ b/third_party/nccl/LICENSE @@ -1,203 +1,30 @@ -Copyright 2018 The TensorFlow Authors. All rights reserved. - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ + Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of NVIDIA CORPORATION, Lawrence Berkeley National + Laboratory, the U.S. Department of Energy, nor the names of their + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. - 1. Definitions. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018, The TensorFlow Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + The U.S. Department of Energy funded the development of this software + under subcontract 7078610 with Lawrence Berkeley National Laboratory. diff --git a/third_party/nccl/archive.BUILD b/third_party/nccl/archive.BUILD new file mode 100644 index 00000000000..f57f04c75ed --- /dev/null +++ b/third_party/nccl/archive.BUILD @@ -0,0 +1,179 @@ +# NVIDIA NCCL 2 +# A package of optimized primitives for collective multi-GPU communication. + +licenses(["restricted"]) + +exports_files(["LICENSE.txt"]) + +load( + "@local_config_nccl//:build_defs.bzl", + "device_link", + "gen_nccl_h", + "nccl_library", + "rdc_copts", +) +load( + "@local_config_cuda//cuda:build_defs.bzl", + "cuda_default_copts", +) + +# Generate the nccl.h header file. +gen_nccl_h( + name = "nccl_h", + output = "src/nccl.h", + template = "src/nccl.h.in", +) + +nccl_library( + name = "src_hdrs", + hdrs = [ + "src/nccl.h", + # src/include/common_coll.h #includes "collectives/collectives.h". + # All other #includes of collectives.h are patched in process_srcs. + "src/collectives/collectives.h", + ], + strip_include_prefix = "src", +) + +nccl_library( + name = "include_hdrs", + hdrs = glob(["src/include/*.h"]), + strip_include_prefix = "src/include", +) + +filegroup( + name = "device_hdrs", + srcs = glob(["src/collectives/device/*.h"]), +) + +filegroup( + name = "device_srcs", + srcs = [ + "src/collectives/device/all_gather.cu", + "src/collectives/device/all_reduce.cu", + "src/collectives/device/broadcast.cu", + "src/collectives/device/reduce.cu", + "src/collectives/device/reduce_scatter.cu", + ], +) + +nccl_library( + name = "sum", + srcs = [ + ":device_hdrs", + ":device_srcs", + ], + copts = ["-DNCCL_OP=0"] + rdc_copts(), + prefix = "sum_", + deps = [ + ":src_hdrs", + ":include_hdrs", + "@local_config_cuda//cuda:cuda_headers", + ], + linkstatic = True, +) + +nccl_library( + name = "prod", + srcs = [ + ":device_hdrs", + ":device_srcs", + ], + copts = ["-DNCCL_OP=1"] + rdc_copts(), + prefix = "_prod", + deps = [ + ":src_hdrs", + ":include_hdrs", + "@local_config_cuda//cuda:cuda_headers", + ], + linkstatic = True, +) + +nccl_library( + name = "min", + srcs = [ + ":device_hdrs", + ":device_srcs", + ], + copts = ["-DNCCL_OP=2"] + rdc_copts(), + prefix = "min_", + deps = [ + ":src_hdrs", + ":include_hdrs", + "@local_config_cuda//cuda:cuda_headers", + ], + linkstatic = True, +) + +nccl_library( + name = "max", + srcs = [ + ":device_hdrs", + ":device_srcs", + ], + copts = ["-DNCCL_OP=3"] + rdc_copts(), + prefix = "max_", + deps = [ + ":src_hdrs", + ":include_hdrs", + "@local_config_cuda//cuda:cuda_headers", + ], + linkstatic = True, +) + +nccl_library( + name = "functions", + srcs = [ + ":device_hdrs", + "src/collectives/device/functions.cu", + ], + copts = rdc_copts(), + deps = [ + ":src_hdrs", + ":include_hdrs", + "@local_config_cuda//cuda:cuda_headers", + ], + linkstatic = True, +) + +device_link( + name = "device_code", + srcs = [ + ":functions", + ":max", + ":min", + ":prod", + ":sum", + ], +) + +# Primary NCCL target. +nccl_library( + name = "nccl", + srcs = glob( + include = ["src/**/*.cu"], + # Exclude device-library code. + exclude = ["src/collectives/device/**"], + ) + [ + # Required for header inclusion checking (see + # http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs). + # Files in src/ which #include "nccl.h" load it from there rather than + # from the virtual includes directory. + "src/nccl.h", + ], + hdrs = ["src/nccl.h"], + include_prefix = "third_party/nccl", + strip_include_prefix = "src", + copts = cuda_default_copts(), + deps = [ + ":device_code", + ":functions", + ":include_hdrs", + ":max", + ":min", + ":prod", + ":src_hdrs", + ":sum", + ], + visibility = ["//visibility:public"], +) diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl new file mode 100644 index 00000000000..ede1d3dad5d --- /dev/null +++ b/third_party/nccl/build_defs.bzl.tpl @@ -0,0 +1,210 @@ +"""Repository rule for NCCL.""" + +load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts") + +def _gen_nccl_h_impl(ctx): + """Creates nccl.h from a template.""" + ctx.actions.expand_template( + output = ctx.outputs.output, + template = ctx.file.template, + substitutions = { + "${nccl:Major}": "2", + "${nccl:Minor}": "3", + "${nccl:Patch}": "5", + "${nccl:Suffix}": "", + "${nccl:Version}": "2305", + }, + ) +gen_nccl_h = rule( + implementation = _gen_nccl_h_impl, + attrs = { + "template": attr.label(allow_single_file = True), + "output": attr.output(), + }, +) +"""Creates the NCCL header file.""" + + +def _process_srcs_impl(ctx): + """Appends .cc to .cu files, patches include directives.""" + files = [] + for src in ctx.files.srcs: + if not src.is_source: + # Process only once, specifically "src/nccl.h". + files.append(src) + continue + name = src.basename + if src.extension == "cu": + name = ctx.attr.prefix + name + ".cc" + file = ctx.actions.declare_file(name, sibling = src) + ctx.actions.expand_template( + output = file, + template = src, + substitutions = { + "\"collectives.h": "\"collectives/collectives.h", + "\"../collectives.h": "\"collectives/collectives.h", + "#if __CUDACC_VER_MAJOR__": + "#if defined __CUDACC_VER_MAJOR__ && __CUDACC_VER_MAJOR__", + # Substitutions are applied in order. + "std::nullptr_t": "nullptr_t", + "nullptr_t": "std::nullptr_t", + }, + ) + files.append(file) + return [DefaultInfo(files = depset(files))] +_process_srcs = rule( + implementation = _process_srcs_impl, + attrs = { + "srcs": attr.label_list(allow_files = True), + "prefix": attr.string(default = ""), + }, +) +"""Processes the NCCL srcs so they can be compiled with bazel and clang.""" + + +def nccl_library(name, srcs=None, hdrs=None, prefix=None, **kwargs): + """Processes the srcs and hdrs and creates a cc_library.""" + + _process_srcs( + name = name + "_srcs", + srcs = srcs, + prefix = prefix, + ) + _process_srcs( + name = name + "_hdrs", + srcs = hdrs, + ) + + native.cc_library( + name = name, + srcs = [name + "_srcs"] if srcs else [], + hdrs = [name + "_hdrs"] if hdrs else [], + **kwargs + ) + + +def rdc_copts(): + """Returns copts for compiling relocatable device code.""" + + # The global functions can not have a lower register count than the + # device functions. This is enforced by setting a fixed register count. + # https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48 + maxrregcount = "-maxrregcount=96" + + return cuda_default_copts() + select({ + "@local_config_cuda//cuda:using_nvcc": [ + "-nvcc_options", + "relocatable-device-code=true", + "-nvcc_options", + "ptxas-options=" + maxrregcount, + ], + "@local_config_cuda//cuda:using_clang": [ + "-fcuda-rdc", + "-Xcuda-ptxas", + maxrregcount, + ], + "//conditions:default": [], + }) + ["-fvisibility=hidden"] + + +def _filter_impl(ctx): + suffix = ctx.attr.suffix + files = [src for src in ctx.files.srcs if src.path.endswith(suffix)] + return [DefaultInfo(files = depset(files))] +_filter = rule( + implementation = _filter_impl, + attrs = { + "srcs": attr.label_list(allow_files = True), + "suffix": attr.string(), + }, +) +"""Filters the srcs to the ones ending with suffix.""" + + +def _gen_link_src_impl(ctx): + ctx.actions.expand_template( + output = ctx.outputs.output, + template = ctx.file.template, + substitutions = { + "REGISTERLINKBINARYFILE": '"%s"' % ctx.file.register_hdr.short_path, + "FATBINFILE": '"%s"' % ctx.file.fatbin_hdr.short_path, + }, + ) +_gen_link_src = rule( + implementation = _gen_link_src_impl, + attrs = { + "register_hdr": attr.label(allow_single_file = True), + "fatbin_hdr": attr.label(allow_single_file = True), + "template": attr.label(allow_single_file = True), + "output": attr.output(), + }, +) +"""Patches the include directives for the link.stub file.""" + + +def device_link(name, srcs): + """Links seperately compiled relocatable device code into a cc_library.""" + + # From .a and .pic.a archives, just use the latter. + _filter( + name = name + "_pic_a", + srcs = srcs, + suffix = ".pic.a", + ) + + # Device-link to cubins for each architecture. + images = [] + cubins = [] + for arch in %{gpu_architectures}: + cubin = "%s_%s.cubin" % (name, arch) + register_hdr = "%s_%s.h" % (name, arch) + nvlink = "@local_config_nccl//:nvlink" + cmd = ("$(location %s) --cpu-arch=X86_64 " % nvlink + + "--arch=%s $(SRCS) " % arch + + "--register-link-binaries=$(location %s) " % register_hdr + + "--output-file=$(location %s)" % cubin) + native.genrule( + name = "%s_%s" % (name, arch), + outs = [register_hdr, cubin], + srcs = [name + "_pic_a"], + cmd = cmd, + tools = [nvlink], + ) + images.append("--image=profile=%s,file=$(location %s)" % (arch, cubin)) + cubins.append(cubin) + + # Generate fatbin header from all cubins. + fatbin_hdr = name + ".fatbin.h" + fatbinary = "@local_config_nccl//:cuda/bin/fatbinary" + cmd = ("PATH=$$CUDA_TOOLKIT_PATH/bin:$$PATH " + # for bin2c + "$(location %s) -64 --cmdline=--compile-only --link " % fatbinary + + "--compress-all %s --create=%%{name}.fatbin " % " ".join(images) + + "--embedded-fatbin=$@") + native.genrule( + name = name + "_fatbin_h", + outs = [fatbin_hdr], + srcs = cubins, + cmd = cmd, + tools = [fatbinary], + ) + + # Generate the source file #including the headers generated above. + _gen_link_src( + name = name + "_cc", + # Include just the last one, they are equivalent. + register_hdr = register_hdr, + fatbin_hdr = fatbin_hdr, + template = "@local_config_nccl//:cuda/bin/crt/link.stub", + output = name + ".cc", + ) + + # Compile the source file into the cc_library. + native.cc_library( + name = name, + srcs = [name + "_cc"], + textual_hdrs = [register_hdr, fatbin_hdr], + deps = [ + "@local_config_cuda//cuda:cuda_headers", + "@local_config_cuda//cuda:cudart_static", + ], + ) diff --git a/third_party/nccl/nccl_archive.BUILD b/third_party/nccl/nccl_archive.BUILD deleted file mode 100644 index a05899e38d5..00000000000 --- a/third_party/nccl/nccl_archive.BUILD +++ /dev/null @@ -1,68 +0,0 @@ -# NVIDIA nccl -# A package of optimized primitives for collective multi-GPU communication. - -licenses(["notice"]) # BSD - -exports_files(["LICENSE.txt"]) - -load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "if_cuda") - -SRCS = [ - "src/all_gather.cu", - "src/all_reduce.cu", - "src/broadcast.cu", - "src/core.cu", - "src/libwrap.cu", - "src/reduce.cu", - "src/reduce_scatter.cu", -] - -# Copy .cu to .cu.cc so they can be in srcs of cc_library. -[ - genrule( - name = "gen_" + src, - srcs = [src], - outs = [src + ".cc"], - cmd = "cp $(location " + src + ") $(location " + src + ".cc)", - ) - for src in SRCS -] - -SRCS_CU_CC = [src + ".cc" for src in SRCS] - -cc_library( - name = "nccl", - srcs = if_cuda(SRCS_CU_CC + glob(["src/*.h"])), - hdrs = if_cuda(["src/nccl.h"]), - copts = [ - "-DCUDA_MAJOR=0", - "-DCUDA_MINOR=0", - "-DNCCL_MAJOR=0", - "-DNCCL_MINOR=0", - "-DNCCL_PATCH=0", - "-Iexternal/nccl_archive/src", - "-O3", - ] + cuda_default_copts(), - include_prefix = "third_party/nccl", - linkopts = select({ - "@org_tensorflow//tensorflow:android": [ - "-pie", - ], - "@org_tensorflow//tensorflow:darwin": [ - "-Wl,-framework", - "-Wl,CoreFoundation", - "-Wl,-framework", - "-Wl,Security", - ], - "@org_tensorflow//tensorflow:ios": [], - "@org_tensorflow//tensorflow:windows": [ - "-DEFAULTLIB:ws2_32.lib", - ], - "//conditions:default": [ - "-lrt", - ], - }), - strip_include_prefix = "src", - visibility = ["//visibility:public"], - deps = ["@local_config_cuda//cuda:cuda_headers"], -) diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index d78fe8f3aab..7f00df09620 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -11,12 +11,16 @@ load( "//third_party/gpus:cuda_configure.bzl", "auto_configure_fail", + "compute_capabilities", + "cuda_toolkit_path", "find_cuda_define", "matches_version", ) -_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH" +_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH" _NCCL_HDR_PATH = "NCCL_HDR_PATH" +_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH" +_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES" _TF_NCCL_VERSION = "TF_NCCL_VERSION" _TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO" @@ -37,6 +41,12 @@ cc_library( """ _NCCL_ARCHIVE_BUILD_CONTENT = """ +exports_files([ + "cuda/bin/crt/link.stub", + "cuda/bin/fatbinary", + "nvlink", +]) + filegroup( name = "LICENSE", data = ["@nccl_archive//:LICENSE.txt"], @@ -50,113 +60,125 @@ alias( ) """ -# Local build results in dynamic link and the license should not be included. -_NCCL_REMOTE_BUILD_TEMPLATE = Label("//third_party/nccl:remote.BUILD.tpl") -_NCCL_LOCAL_BUILD_TEMPLATE = Label("//third_party/nccl:system.BUILD.tpl") +def _label(file): + return Label("//third_party/nccl:{}".format(file)) def _find_nccl_header(repository_ctx, nccl_install_path): - """Finds the NCCL header on the system. + """Finds the NCCL header on the system. - Args: - repository_ctx: The repository context. - nccl_install_path: The NCCL library install directory. - - Returns: - The path to the NCCL header. - """ - header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path) - if not header_path.exists: - auto_configure_fail("Cannot find %s" % str(header_path)) - return header_path + Args: + repository_ctx: The repository context. + nccl_install_path: The NCCL library install directory. + Returns: + The path to the NCCL header. + """ + header_path = repository_ctx.path("%s/include/nccl.h" % nccl_install_path) + if not header_path.exists: + auto_configure_fail("Cannot find %s" % str(header_path)) + return header_path def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version): - """Checks whether the header file matches the specified version of NCCL. + """Checks whether the header file matches the specified version of NCCL. - Args: - repository_ctx: The repository context. - nccl_install_path: The NCCL library install directory. - nccl_version: The expected NCCL version. - - Returns: - A string containing the library version of NCCL. - """ - header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path) - if not header_path.exists: - header_path = _find_nccl_header(repository_ctx, nccl_install_path) - header_dir = str(header_path.realpath.dirname) - major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", - _DEFINE_NCCL_MAJOR) - minor_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", - _DEFINE_NCCL_MINOR) - patch_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", - _DEFINE_NCCL_PATCH) - header_version = "%s.%s.%s" % (major_version, minor_version, patch_version) - if not matches_version(nccl_version, header_version): - auto_configure_fail( - ("NCCL library version detected from %s/nccl.h (%s) does not match " + - "TF_NCCL_VERSION (%s). To fix this rerun configure again.") % - (header_dir, header_version, nccl_version)) - - -def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version): - """Finds the given NCCL library on the system. - - Args: - repository_ctx: The repository context. - nccl_install_path: The NCCL library installation directory. - nccl_version: The version of NCCL library files as returned - by _nccl_version. - - Returns: - The path to the NCCL library. - """ - lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path, - nccl_version)) - if not lib_path.exists: - auto_configure_fail("Cannot find NCCL library %s" % str(lib_path)) - return lib_path + Args: + repository_ctx: The repository context. + nccl_install_path: The NCCL library install directory. + nccl_hdr_path: The NCCL header path. + nccl_version: The expected NCCL version. + Returns: + A string containing the library version of NCCL. + """ + header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path) + if not header_path.exists: + header_path = _find_nccl_header(repository_ctx, nccl_install_path) + header_dir = str(header_path.realpath.dirname) + major_version = find_cuda_define( + repository_ctx, + header_dir, + "nccl.h", + _DEFINE_NCCL_MAJOR, + ) + minor_version = find_cuda_define( + repository_ctx, + header_dir, + "nccl.h", + _DEFINE_NCCL_MINOR, + ) + patch_version = find_cuda_define( + repository_ctx, + header_dir, + "nccl.h", + _DEFINE_NCCL_PATCH, + ) + header_version = "%s.%s.%s" % (major_version, minor_version, patch_version) + if not matches_version(nccl_version, header_version): + auto_configure_fail( + ("NCCL library version detected from %s/nccl.h (%s) does not match " + + "TF_NCCL_VERSION (%s). To fix this rerun configure again.") % + (header_dir, header_version, nccl_version), + ) def _nccl_configure_impl(repository_ctx): - """Implementation of the nccl_configure repository rule.""" - if _TF_NCCL_VERSION not in repository_ctx.os.environ: - # Add a dummy build file to make bazel query happy. - repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) - return + """Implementation of the nccl_configure repository rule.""" + if _TF_NCCL_VERSION not in repository_ctx.os.environ: + # Add a dummy build file to make bazel query happy. + repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) + return - if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ: - # Forward to the pre-configured remote repository. - repository_ctx.template("BUILD", _NCCL_REMOTE_BUILD_TEMPLATE, { - "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO], - }) - return + if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ: + # Forward to the pre-configured remote repository. + repository_ctx.template("BUILD", _label("remote.BUILD.tpl"), { + "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO], + }) + return - nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip() - if matches_version("1", nccl_version): - # Alias to GitHub target from @nccl_archive. - if not matches_version(nccl_version, "1.3"): - auto_configure_fail( - "NCCL from GitHub must use version 1.3 (got %s)" % nccl_version) - repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) - else: - # Create target for locally installed NCCL. - nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip() - nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip() - _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version) - repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, { - "%{version}": nccl_version, - "%{install_path}": nccl_install_path, - "%{hdr_path}": nccl_hdr_path, - }) + nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip() + if nccl_version == "": + # Alias to open source build from @nccl_archive. + repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + # TODO(csigg): implement and reuse in cuda_configure.bzl. + gpu_architectures = [ + "sm_" + capability.replace(".", "") + for capability in compute_capabilities(repository_ctx) + ] + + # Round-about way to make the list unique. + gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys() + repository_ctx.template("build_defs.bzl", _label("build_defs.bzl.tpl"), { + "%{gpu_architectures}": str(gpu_architectures), + }) + + repository_ctx.symlink(cuda_toolkit_path(repository_ctx), "cuda") + + # Temporary work-around for setups which symlink ptxas to a newer + # version. The versions of nvlink and ptxas need to agree, so we find + # nvlink next to the real location of ptxas. This is only temporary and + # will be removed again soon. + nvlink_dir = repository_ctx.path("cuda/bin/ptxas").realpath.dirname + repository_ctx.symlink(nvlink_dir.get_child("nvlink"), "nvlink") + else: + # Create target for locally installed NCCL. + nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip() + nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip() + _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version) + repository_ctx.template("BUILD", _label("system.BUILD.tpl"), { + "%{version}": nccl_version, + "%{install_path}": nccl_install_path, + "%{hdr_path}": nccl_hdr_path, + }) nccl_configure = repository_rule( - implementation=_nccl_configure_impl, - environ=[ - _NCCL_INSTALL_PATH, + implementation = _nccl_configure_impl, + environ = [ + _CUDA_TOOLKIT_PATH, _NCCL_HDR_PATH, + _NCCL_INSTALL_PATH, _TF_NCCL_VERSION, + _TF_CUDA_COMPUTE_CAPABILITIES, + _TF_NCCL_CONFIG_REPO, ], ) """Detects and configures the NCCL configuration.