From a2323c1b1f0f857fd40fff7a481dfa300b8f7e02 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 30 Oct 2019 09:38:02 -0700
Subject: [PATCH] Allow using a sysroot and setting the cuda toolkit path when
 compiling with clang.

PiperOrigin-RevId: 277522657
Change-Id: I26364457c3e7acc5ae68681fb70f670bbd24a048
---
 third_party/gpus/crosstool/BUILD.tpl          |  2 +
 .../crosstool/cc_toolchain_config.bzl.tpl     | 32 ++++++++++++++-
 third_party/gpus/cuda_configure.bzl           | 40 ++++++++++++++++---
 .../toolchains/preconfig/generate/BUILD       | 22 +++++-----
 .../preconfig/generate/generate.bzl           |  8 ++--
 .../toolchains/preconfig/generate/generate.sh |  4 +-
 6 files changed, 86 insertions(+), 22 deletions(-)

diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl
index 9fe46bbe649..ab77276a9ff 100644
--- a/third_party/gpus/crosstool/BUILD.tpl
+++ b/third_party/gpus/crosstool/BUILD.tpl
@@ -64,6 +64,8 @@ cc_toolchain_config(
     host_compiler_warnings = [%{host_compiler_warnings}],
     host_unfiltered_compile_flags = [%{unfiltered_compile_flags}],
     linker_bin_path = "%{linker_bin_path}",
+    builtin_sysroot = "%{builtin_sysroot}",
+    cuda_path = "%{cuda_toolkit_path}",
 )
 
 cc_toolchain(
diff --git a/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl b/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl
index 282ba08cda5..3d4d41aa2b1 100644
--- a/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl
+++ b/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl
@@ -114,7 +114,7 @@ def _impl(ctx):
 
     cc_target_os = None
 
-    builtin_sysroot = None
+    builtin_sysroot = ctx.attr.builtin_sysroot
 
     all_link_actions = [
         ACTION_NAMES.cpp_link_executable,
@@ -1065,6 +1065,32 @@ def _impl(ctx):
         ],
     )
 
+    cuda_path_feature = feature(
+        name = "cuda_path",
+        enabled = True,
+        flag_sets = [
+            flag_set(
+                actions = [
+                    ACTION_NAMES.assemble,
+                    ACTION_NAMES.preprocess_assemble,
+                    ACTION_NAMES.c_compile,
+                    ACTION_NAMES.cpp_compile,
+                    ACTION_NAMES.cpp_header_parsing,
+                    ACTION_NAMES.cpp_module_compile,
+                    ACTION_NAMES.cpp_module_codegen,
+                    ACTION_NAMES.cpp_link_executable,
+                    ACTION_NAMES.cpp_link_dynamic_library,
+                    ACTION_NAMES.cpp_link_nodeps_dynamic_library,
+                ],
+                flag_groups = [
+                    flag_group(
+                        flags = ["--cuda-path=" + ctx.attr.cuda_path],
+                    ),
+                ],
+            ),
+        ],
+    )
+
     def_file_feature = feature(
         name = "def_file",
         flag_sets = [
@@ -1313,6 +1339,8 @@ def _impl(ctx):
             supports_dynamic_linker_feature,
             supports_pic_feature,
         ]
+        if ctx.attr.cuda_path:
+            features += [cuda_path_feature]
     elif (ctx.attr.cpu == "darwin"):
         features = [
             cpp11_feature,
@@ -1472,6 +1500,8 @@ cc_toolchain_config = rule(
         "host_compiler_warnings": attr.string_list(),
         "host_unfiltered_compile_flags": attr.string_list(),
         "linker_bin_path": attr.string(),
+        "builtin_sysroot": attr.string(),
+        "cuda_path": attr.string(),
         "msvc_cl_path": attr.string(default = "msvc_not_used"),
         "msvc_env_include": attr.string(default = "msvc_not_used"),
         "msvc_env_lib": attr.string(default = "msvc_not_used"),
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 129f56df87d..026a3a08757 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -7,6 +7,7 @@
   * `TF_CUDA_CLANG`: Whether to use clang as a cuda compiler.
   * `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for
     both host and device code compilation if TF_CUDA_CLANG is 1.
+  * `TF_SYSROOT`: The sysroot to use when compiling.
   * `TF_DOWNLOAD_CLANG`: Whether to download a recent release of clang
     compiler and use it to build tensorflow. When this option is set
     CLANG_CUDA_COMPILER_PATH is ignored.
@@ -40,6 +41,7 @@ load(
 _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
 _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX"
 _CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH"
+_TF_SYSROOT = "TF_SYSROOT"
 _CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH"
 _TF_CUDA_VERSION = "TF_CUDA_VERSION"
 _TF_CUDNN_VERSION = "TF_CUDNN_VERSION"
@@ -275,13 +277,17 @@ def _normalize_include_path(repository_ctx, path):
         return path[len(crosstool_folder) + 1:]
     return path
 
-def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
+def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot):
     """Compute the list of default C or C++ include directories."""
     if lang_is_cpp:
         lang = "c++"
     else:
         lang = "c"
-    result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"])
+    sysroot = []
+    if tf_sysroot:
+        sysroot += ["--sysroot", tf_sysroot]
+    result = repository_ctx.execute([cc, "-E", "-x" + lang, "-", "-v"] +
+                                    sysroot)
     index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN)
     if index1 == -1:
         return []
@@ -302,14 +308,24 @@ def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
         for p in inc_dirs.split("\n")
     ]
 
-def get_cxx_inc_directories(repository_ctx, cc):
+def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot):
     """Compute the list of default C and C++ include directories."""
 
     # For some reason `clang -xc` sometimes returns include paths that are
     # different from the ones from `clang -xc++`. (Symlink and a dir)
     # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
-    includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
-    includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
+    includes_cpp = _get_cxx_inc_directories_impl(
+        repository_ctx,
+        cc,
+        True,
+        tf_sysroot,
+    )
+    includes_c = _get_cxx_inc_directories_impl(
+        repository_ctx,
+        cc,
+        False,
+        tf_sysroot,
+    )
 
     return includes_cpp + [
         inc
@@ -970,6 +986,11 @@ def _flag_enabled(repository_ctx, flag_name):
 def _use_cuda_clang(repository_ctx):
     return _flag_enabled(repository_ctx, "TF_CUDA_CLANG")
 
+def _tf_sysroot(repository_ctx):
+    if _TF_SYSROOT in repository_ctx.os.environ:
+        return repository_ctx.os.environ[_TF_SYSROOT]
+    return ""
+
 def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
     capability_flags = [
         "--cuda-gpu-arch=sm_" + cap.replace(".", "")
@@ -1098,6 +1119,7 @@ def _create_local_cuda_repository(repository_ctx):
     )
 
     is_cuda_clang = _use_cuda_clang(repository_ctx)
+    tf_sysroot = _tf_sysroot(repository_ctx)
 
     should_download_clang = is_cuda_clang and _flag_enabled(
         repository_ctx,
@@ -1110,8 +1132,14 @@ def _create_local_cuda_repository(repository_ctx):
     cc = find_cc(repository_ctx)
     cc_fullpath = cc if not should_download_clang else "crosstool/" + cc
 
-    host_compiler_includes = get_cxx_inc_directories(repository_ctx, cc_fullpath)
+    host_compiler_includes = get_cxx_inc_directories(
+        repository_ctx,
+        cc_fullpath,
+        tf_sysroot,
+    )
     cuda_defines = {}
+    cuda_defines["%{builtin_sysroot}"] = tf_sysroot
+    cuda_defines["%{cuda_toolkit_path}"] = cuda_config.config["cuda_toolkit_path"]
 
     host_compiler_prefix = "/usr/bin"
     if _GCC_HOST_COMPILER_PREFIX in repository_ctx.os.environ:
diff --git a/third_party/toolchains/preconfig/generate/BUILD b/third_party/toolchains/preconfig/generate/BUILD
index c2c2e2b5e69..7a4f7456c00 100644
--- a/third_party/toolchains/preconfig/generate/BUILD
+++ b/third_party/toolchains/preconfig/generate/BUILD
@@ -75,16 +75,6 @@ tensorflow_rbe_config(
     tensorrt_version = "5",
 )
 
-tensorflow_rbe_config(
-    name = "ubuntu14.04-py3-clang-cuda10.0-cudnn7-tensorrt5",
-    compiler = "clang",
-    cuda_version = "10.0",
-    cudnn_version = "7",
-    os = "ubuntu14.04",
-    python_version = "3",
-    tensorrt_version = "5",
-)
-
 tensorflow_rbe_config(
     name = "ubuntu16.04-py-gcc7_manylinux2010",
     compiler = "/dt7/usr/bin/gcc",
@@ -113,6 +103,18 @@ tensorflow_rbe_config(
     tensorrt_version = "5.1",
 )
 
+tensorflow_rbe_config(
+    name = "ubuntu16.04-py3-clang_manylinux2010-cuda10.0-cudnn7-tensorrt5.1",
+    compiler = "/clang_r373795/bin/clang",
+    cuda_version = "10.0",
+    cudnn_version = "7",
+    os = "ubuntu16.04-manylinux2010",
+    python_version = "3",
+    sysroot = "/dt7",
+    tensorrt_install_path = "/usr",
+    tensorrt_version = "5.1",
+)
+
 tensorflow_rbe_config(
     name = "ubuntu16.04-py3_opt-gcc5-rocm",
     compiler = "gcc",
diff --git a/third_party/toolchains/preconfig/generate/generate.bzl b/third_party/toolchains/preconfig/generate/generate.bzl
index 52e6a3c6ceb..66e551d8a53 100644
--- a/third_party/toolchains/preconfig/generate/generate.bzl
+++ b/third_party/toolchains/preconfig/generate/generate.bzl
@@ -3,7 +3,7 @@ load(
     "docker_toolchain_autoconfig",
 )
 
-def _tensorflow_rbe_config(name, compiler, python_version, os, rocm_version = None, cuda_version = None, cudnn_version = None, tensorrt_version = None, tensorrt_install_path = None, cudnn_install_path = None, compiler_prefix = None, build_bazel_src = False):
+def _tensorflow_rbe_config(name, compiler, python_version, os, rocm_version = None, cuda_version = None, cudnn_version = None, tensorrt_version = None, tensorrt_install_path = None, cudnn_install_path = None, compiler_prefix = None, build_bazel_src = False, sysroot = None):
     base = "@%s//image" % os
     config_repos = [
         "local_config_python",
@@ -40,7 +40,7 @@ def _tensorflow_rbe_config(name, compiler, python_version, os, rocm_version = No
         ]
         env.update({
             "TF_NEED_CUDA": "1",
-            "TF_CUDA_CLANG": "1" if compiler == "clang" else "0",
+            "TF_CUDA_CLANG": "1" if compiler.endswith("clang") else "0",
             "TF_CUDA_COMPUTE_CAPABILITIES": "3.0,6.0",
             "TF_ENABLE_XLA": "1",
             "TF_CUDNN_VERSION": cudnn_version,
@@ -49,8 +49,10 @@ def _tensorflow_rbe_config(name, compiler, python_version, os, rocm_version = No
             "TF_NEED_TENSORRT": "1",
             "TF_TENSORRT_VERSION": tensorrt_version,
             "TENSORRT_INSTALL_PATH": tensorrt_install_path if tensorrt_install_path != None else "/usr/lib/x86_64-linux-gnu",
-            "GCC_HOST_COMPILER_PATH": compiler if compiler != "clang" else "",
+            "GCC_HOST_COMPILER_PATH": compiler if not compiler.endswith("clang") else "",
             "GCC_HOST_COMPILER_PREFIX": compiler_prefix if compiler_prefix != None else "/usr/bin",
+            "CLANG_CUDA_COMPILER_PATH": compiler if compiler.endswith("clang") else "",
+            "TF_SYSROOT": sysroot if sysroot else "",
         })
 
     if rocm_version != None:
diff --git a/third_party/toolchains/preconfig/generate/generate.sh b/third_party/toolchains/preconfig/generate/generate.sh
index 03d314fff2b..43ac375715a 100755
--- a/third_party/toolchains/preconfig/generate/generate.sh
+++ b/third_party/toolchains/preconfig/generate/generate.sh
@@ -46,8 +46,8 @@ elif [[ -n "${GPU_VERSION}" ]]; then
   # Currently we create a special toolchain for clang when compiling with
   # cuda enabled. We can get rid of this once the default toolchain bazel
   # provides supports cuda.
-  if [[ "${COMPILER}" == "clang" ]]; then
-    COMPILER="cuda-clang"
+  if [[ "${COMPILER}" == clang* ]]; then
+    COMPILER="${COMPILER}-${GPU_VERSION}"
   fi
 fi