Fix nccl for remote builds.

Instead of symlinking the install dir, copy the two files we need.
Symlinking a system dir like /usr is generally problematic as it can quickly
lead to miscompiles for unrelated reasons. Furthermore, bazel will consider
it an error if /usr is linked in and contains a recursive symlink in
/usr/bin/X11 -> .

PiperOrigin-RevId: 211842260
This commit is contained in:
A. Unique TensorFlower 2018-09-06 11:41:59 -07:00 committed by TensorFlower Gardener
parent 0d76eadeb2
commit b51f4c97e8
4 changed files with 46 additions and 21 deletions

0
third_party/nccl/BUILD vendored Normal file
View File

View File

@ -16,6 +16,7 @@ load(
_NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH"
_TF_NCCL_VERSION = "TF_NCCL_VERSION"
_TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO"
_DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR"
_DEFINE_NCCL_MINOR = "#define NCCL_MINOR"
@ -48,25 +49,8 @@ alias(
"""
# Local build results in dynamic link and the license should not be included.
_NCCL_LOCAL_BUILD_TEMPLATE = """
filegroup(
name = "LICENSE",
visibility = ["//visibility:public"],
)
cc_library(
name = "nccl",
srcs = ["nccl/lib/libnccl.so.%s"],
hdrs = ["nccl/include/nccl.h"],
include_prefix = "third_party/nccl",
strip_include_prefix = "nccl/include",
deps = [
"@local_config_cuda//cuda:cuda_headers",
],
visibility = ["//visibility:public"],
)
"""
_NCCL_REMOTE_BUILD_TEMPLATE = Label("//third_party/nccl:remote.BUILD.tpl")
_NCCL_LOCAL_BUILD_TEMPLATE = Label("//third_party/nccl:system.BUILD.tpl")
def _find_nccl_header(repository_ctx, nccl_install_path):
"""Finds the NCCL header on the system.
@ -137,6 +121,13 @@ def _nccl_configure_impl(repository_ctx):
repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT)
return
if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ:
# Forward to the pre-configured remote repository.
repository_ctx.template("BUILD", _NCCL_REMOTE_BUILD_TEMPLATE, {
"%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO],
})
return
nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
if matches_version("1", nccl_version):
# Alias to GitHub target from @nccl_archive.
@ -148,8 +139,10 @@ def _nccl_configure_impl(repository_ctx):
# Create target for locally installed NCCL.
nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip()
_check_nccl_version(repository_ctx, nccl_install_path, nccl_version)
repository_ctx.symlink(nccl_install_path, "nccl")
repository_ctx.file("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE % nccl_version)
repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, {
"%{version}": nccl_version,
"%{install_path}": nccl_install_path,
})
nccl_configure = repository_rule(

6
third_party/nccl/remote.BUILD.tpl vendored Normal file
View File

@ -0,0 +1,6 @@
licenses(["restricted"])
package(default_visibility = ["//visibility:public"])
alias(name="LICENSE", actual = "%{target}:LICENSE")
alias(name = "nccl", actual = "%{target}:nccl")

26
third_party/nccl/system.BUILD.tpl vendored Normal file
View File

@ -0,0 +1,26 @@
filegroup(
name = "LICENSE",
visibility = ["//visibility:public"],
)
cc_library(
name = "nccl",
srcs = ["libnccl.so.%{version}"],
hdrs = ["nccl.h"],
include_prefix = "third_party/nccl",
deps = [
"@local_config_cuda//cuda:cuda_headers",
],
visibility = ["//visibility:public"],
)
genrule(
name = "nccl-files",
outs = [
"libnccl.so.%{version}",
"nccl.h",
],
cmd = """cp "%{install_path}/include/nccl.h" "$(@D)/nccl.h" &&
cp "%{install_path}/lib/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """,
)