From b51f4c97e899e7663b6cf39b9b8da41540b06e4c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Sep 2018 11:41:59 -0700 Subject: [PATCH] Fix nccl for remote builds. Instead of symlinking the install dir, copy the two files we need. Symlinking a system dir like /usr is generally problematic as it can quickly lead to miscompiles for unrelated reasons. Furthermore, bazel will consider it an error if /usr is linked in and contains a recursive symlink in /usr/bin/X11 -> . PiperOrigin-RevId: 211842260 --- third_party/nccl/BUILD | 0 third_party/nccl/nccl_configure.bzl | 35 ++++++++++++----------------- third_party/nccl/remote.BUILD.tpl | 6 +++++ third_party/nccl/system.BUILD.tpl | 26 +++++++++++++++++++++ 4 files changed, 46 insertions(+), 21 deletions(-) create mode 100644 third_party/nccl/BUILD create mode 100644 third_party/nccl/remote.BUILD.tpl create mode 100644 third_party/nccl/system.BUILD.tpl diff --git a/third_party/nccl/BUILD b/third_party/nccl/BUILD new file mode 100644 index 00000000000..e69de29bb2d diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index 5d1ebf06867..ce9447096ec 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -16,6 +16,7 @@ load( _NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH" _TF_NCCL_VERSION = "TF_NCCL_VERSION" +_TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO" _DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" _DEFINE_NCCL_MINOR = "#define NCCL_MINOR" @@ -48,25 +49,8 @@ alias( """ # Local build results in dynamic link and the license should not be included. -_NCCL_LOCAL_BUILD_TEMPLATE = """ -filegroup( - name = "LICENSE", - visibility = ["//visibility:public"], -) - -cc_library( - name = "nccl", - srcs = ["nccl/lib/libnccl.so.%s"], - hdrs = ["nccl/include/nccl.h"], - include_prefix = "third_party/nccl", - strip_include_prefix = "nccl/include", - deps = [ - "@local_config_cuda//cuda:cuda_headers", - ], - visibility = ["//visibility:public"], -) -""" - +_NCCL_REMOTE_BUILD_TEMPLATE = Label("//third_party/nccl:remote.BUILD.tpl") +_NCCL_LOCAL_BUILD_TEMPLATE = Label("//third_party/nccl:system.BUILD.tpl") def _find_nccl_header(repository_ctx, nccl_install_path): """Finds the NCCL header on the system. @@ -137,6 +121,13 @@ def _nccl_configure_impl(repository_ctx): repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) return + if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ: + # Forward to the pre-configured remote repository. + repository_ctx.template("BUILD", _NCCL_REMOTE_BUILD_TEMPLATE, { + "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO], + }) + return + nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip() if matches_version("1", nccl_version): # Alias to GitHub target from @nccl_archive. @@ -148,8 +139,10 @@ def _nccl_configure_impl(repository_ctx): # Create target for locally installed NCCL. nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip() _check_nccl_version(repository_ctx, nccl_install_path, nccl_version) - repository_ctx.symlink(nccl_install_path, "nccl") - repository_ctx.file("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE % nccl_version) + repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, { + "%{version}": nccl_version, + "%{install_path}": nccl_install_path, + }) nccl_configure = repository_rule( diff --git a/third_party/nccl/remote.BUILD.tpl b/third_party/nccl/remote.BUILD.tpl new file mode 100644 index 00000000000..d66fc5563d1 --- /dev/null +++ b/third_party/nccl/remote.BUILD.tpl @@ -0,0 +1,6 @@ +licenses(["restricted"]) + +package(default_visibility = ["//visibility:public"]) + +alias(name="LICENSE", actual = "%{target}:LICENSE") +alias(name = "nccl", actual = "%{target}:nccl") diff --git a/third_party/nccl/system.BUILD.tpl b/third_party/nccl/system.BUILD.tpl new file mode 100644 index 00000000000..7ca835dedfc --- /dev/null +++ b/third_party/nccl/system.BUILD.tpl @@ -0,0 +1,26 @@ +filegroup( + name = "LICENSE", + visibility = ["//visibility:public"], +) + +cc_library( + name = "nccl", + srcs = ["libnccl.so.%{version}"], + hdrs = ["nccl.h"], + include_prefix = "third_party/nccl", + deps = [ + "@local_config_cuda//cuda:cuda_headers", + ], + visibility = ["//visibility:public"], +) + +genrule( + name = "nccl-files", + outs = [ + "libnccl.so.%{version}", + "nccl.h", + ], + cmd = """cp "%{install_path}/include/nccl.h" "$(@D)/nccl.h" && + cp "%{install_path}/lib/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """, +) +