From 09fe958feebec0405ccac225c94fc130304fc2f4 Mon Sep 17 00:00:00 2001 From: Jakob Buchgraber Date: Wed, 19 Feb 2020 14:59:49 -0800 Subject: [PATCH] Enable Remote Config for ROCM and CUDA RBE pre- and postsubmits Previously TF_CUDA_CONFIG_REPO would point to a pregenerated and checked in configuration. This changes has it point to a remote repository intead that generates the configuration during the build for the specific docker image. All supported configurations can be found in third_party/toolchains/remote_config/configs.bzl. Each tensorflow_rbe_config() macro creates a few remote repositories to which to point the TF_*_CONFIG_REPO environment variables to. The remote repository names are prefixed with the macro's name. For example, tensorflow_rbe_config(name = "ubuntu") will create @ubuntu_config_python, @ubuntu_config_cuda, @ubuntu_config_nccl, etc. This change also introduces the platform_configure. All this rule does is create a remote repository with a single platform target for the tensorflow_rbe_config(). This will make the platforms defined in //third_party/toolchains/BUILD obsolete once remote config is fully rolled out. PiperOrigin-RevId: 296065144 Change-Id: Ia54beeb771b28846444e27a2023f70abbd9f6ad5 --- .bazelrc | 4 + tensorflow/opensource_only.files | 6 + .../ubuntu_16/gpu_py36_full/build.sh | 24 +++- tensorflow/workspace.bzl | 4 + third_party/gpus/cuda_configure.bzl | 69 +++++++--- third_party/gpus/rocm_configure.bzl | 48 +++++-- third_party/nccl/nccl_configure.bzl | 29 ++-- third_party/py/python_configure.bzl | 22 ++- third_party/remote_config/BUILD.tpl | 11 ++ .../remote_platform_configure.bzl | 17 +++ third_party/tensorrt/tensorrt_configure.bzl | 24 +++- third_party/toolchains/remote_config/BUILD | 0 .../toolchains/remote_config/configs.bzl | 24 ++++ .../toolchains/remote_config/containers.bzl | 20 +++ .../toolchains/remote_config/rbe_config.bzl | 125 ++++++++++++++++++ 15 files changed, 365 insertions(+), 62 deletions(-) create mode 100644 third_party/remote_config/BUILD.tpl create mode 100644 third_party/remote_config/remote_platform_configure.bzl create mode 100644 third_party/toolchains/remote_config/BUILD create mode 100644 third_party/toolchains/remote_config/configs.bzl create mode 100644 third_party/toolchains/remote_config/containers.bzl create mode 100644 third_party/toolchains/remote_config/rbe_config.bzl diff --git a/.bazelrc b/.bazelrc index 5f9173b9d36..2b80063fd59 100644 --- a/.bazelrc +++ b/.bazelrc @@ -319,6 +319,10 @@ build:xla --define=with_xla_support=true # BEGIN TF REMOTE BUILD EXECUTION OPTIONS # Options when using remote execution # WARNING: THESE OPTIONS WONT WORK IF YOU DO NOT HAVE PROPER AUTHENTICATION AND PERMISSIONS + +# Flag to enable remote config +common --experimental_repo_remote_exec + build:rbe --action_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1 build:rbe --google_default_credentials build:rbe --bes_backend=buildeventservice.googleapis.com diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index 4d39efad106..026f2675737 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -149,7 +149,9 @@ tensorflow/third_party/py/python_configure.bzl tensorflow/third_party/pybind11.BUILD tensorflow/third_party/python_runtime/BUILD tensorflow/third_party/remote_config/BUILD +tensorflow/third_party/remote_config/BUILD.tpl tensorflow/third_party/remote_config/common.bzl +tensorflow/third_party/remote_config/remote_platform_configure.bzl tensorflow/third_party/repo.bzl tensorflow/third_party/six.BUILD tensorflow/third_party/snappy.BUILD @@ -280,6 +282,10 @@ tensorflow/third_party/toolchains/remote/BUILD tensorflow/third_party/toolchains/remote/BUILD.tpl tensorflow/third_party/toolchains/remote/configure.bzl tensorflow/third_party/toolchains/remote/execution.bzl.tpl +tensorflow/third_party/toolchains/remote_config/BUILD +tensorflow/third_party/toolchains/remote_config/configs.bzl +tensorflow/third_party/toolchains/remote_config/containers.bzl +tensorflow/third_party/toolchains/remote_config/rbe_config.bzl tensorflow/third_party/wrapt.BUILD tensorflow/third_party/zlib.BUILD tensorflow/tools/ci_build/release/common.sh diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh index 935db96add1..1498063630a 100644 --- a/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh +++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh @@ -50,6 +50,13 @@ function run_build () { # Get the default test targets for bazel. source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh + RBE_CONFIG="@ubuntu16.04-py3-gcc7_manylinux2010-cuda10.0-cudnn7-tensorrt5.1" + TF_CUDA_CONFIG_REPO="${RBE_CONFIG}_config_cuda" + TF_TENSORRT_CONFIG_REPO="${RBE_CONFIG}_config_tensorrt" + TF_PYTHON_CONFIG_REPO="${RBE_CONFIG}_config_python" + TF_NCCL_CONFIG_REPO="${RBE_CONFIG}_config_nccl" + TF_RBE_PLATFORM="${RBE_CONFIG}_config_platform//:platform" + # Run bazel test command. Double test timeouts to avoid flakes. # //tensorflow/core/platform:setround_test is not supported. See b/64264700 # TODO(klimek): Re-enable tensorrt tests (with different runtime image) once @@ -65,12 +72,14 @@ function run_build () { --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ --action_env=REMOTE_GPU_TESTING=1 \ --action_env=TF_CUDA_COMPUTE_CAPABILITIES="${TF_CUDA_COMPUTE_CAPABILITIES}" \ - --action_env=TF_CUDA_CONFIG_REPO=@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/cuda10.0-cudnn7 \ + --action_env=TF_CUDA_CONFIG_REPO="${TF_CUDA_CONFIG_REPO}" \ --action_env=TF_CUDA_VERSION=10 \ --action_env=TF_CUDNN_VERSION=7 \ --action_env=TF_NEED_TENSORRT=0 \ + --action_env=TF_TENSORRT_CONFIG_REPO="${TF_TENSORRT_CONFIG_REPO}" \ --action_env=TF_NEED_CUDA=1 \ - --action_env=TF_PYTHON_CONFIG_REPO=@org_tensorflow//third_party/toolchains/preconfig/ubuntu16.04/py3 \ + --action_env=TF_PYTHON_CONFIG_REPO="${TF_PYTHON_CONFIG_REPO}" \ + --action_env=TF_NCCL_CONFIG_REPO="${TF_NCCL_CONFIG_REPO}" \ --test_env=LD_LIBRARY_PATH \ --test_tag_filters="${tag_filters}" \ --build_tag_filters="${tag_filters}" \ @@ -89,17 +98,17 @@ function run_build () { --linkopt=-lm \ --distinct_host_configuration=false \ --remote_default_exec_properties=build=${CACHE_SILO_VAL} \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain \ + --crosstool_top="${TF_CUDA_CONFIG_REPO}//crosstool:toolchain" \ --host_javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.1:jdk8 \ --javabase=@bazel_toolchains//configs/ubuntu16_04_clang/1.0:jdk8 \ --host_java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \ --java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8 \ - --extra_toolchains=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain-linux-x86_64 \ - --extra_execution_platforms=@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010 \ - --host_platform=@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010 \ + --extra_toolchains="${TF_CUDA_CONFIG_REPO}//crosstool:toolchain-linux-x86_64" \ + --extra_execution_platforms="${TF_RBE_PLATFORM}" \ + --host_platform="${TF_RBE_PLATFORM}" \ --local_test_jobs=4 \ --remote_timeout=3600 \ - --platforms=@org_tensorflow//third_party/toolchains:rbe_cuda10.0-cudnn7-ubuntu16.04-manylinux2010 \ + --platforms="${TF_RBE_PLATFORM}" \ -- \ ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... @@ -113,3 +122,4 @@ install_bazelisk which bazel run_build + diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index c7160a9ffbd..95a9afa9d5a 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -41,6 +41,7 @@ load("//third_party/psimd:workspace.bzl", psimd = "repo") load("//third_party/pthreadpool:workspace.bzl", pthreadpool = "repo") load("//third_party/sobol_data:workspace.bzl", sobol_data = "repo") load("//third_party/vulkan_headers:workspace.bzl", vulkan_headers = "repo") +load("//third_party/toolchains/remote_config:configs.bzl", "initialize_rbe_configs") def initialize_third_party(): """ Load third party repositories. See above load() statements. """ @@ -81,6 +82,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): def tf_repositories(path_prefix = "", tf_repo_name = ""): """All external dependencies for TF builds.""" + # Loads all external repos to configure RBE builds. + initialize_rbe_configs() + # Note that we check the minimum bazel version in WORKSPACE. clang6_configure(name = "local_config_clang6") cc_download_clang_toolchain(name = "local_config_download_clang") diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index c28cbbac2ea..bdaaa4ab250 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -1174,6 +1174,24 @@ def _create_remote_cuda_repository(repository_ctx, remote_config_repo): {}, ) + repository_ctx.template( + "crosstool/BUILD", + config_repo_label(remote_config_repo, "crosstool:BUILD"), + {}, + ) + + repository_ctx.template( + "crosstool/cc_toolchain_config.bzl", + config_repo_label(remote_config_repo, "crosstool:cc_toolchain_config.bzl"), + {}, + ) + + repository_ctx.template( + "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", + config_repo_label(remote_config_repo, "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc"), + {}, + ) + def _cuda_autoconf_impl(repository_ctx): """Implementation of the cuda_autoconf repository rule.""" if not enable_cuda(repository_ctx): @@ -1191,29 +1209,38 @@ def _cuda_autoconf_impl(repository_ctx): else: _create_local_cuda_repository(repository_ctx) -cuda_configure = repository_rule( - implementation = _cuda_autoconf_impl, - environ = [ - _GCC_HOST_COMPILER_PATH, - _GCC_HOST_COMPILER_PREFIX, - _CLANG_CUDA_COMPILER_PATH, - "TF_NEED_CUDA", - "TF_CUDA_CLANG", - _TF_DOWNLOAD_CLANG, - _CUDA_TOOLKIT_PATH, - _CUDNN_INSTALL_PATH, - _TF_CUDA_VERSION, - _TF_CUDNN_VERSION, - _TF_CUDA_COMPUTE_CAPABILITIES, - _TF_CUDA_CONFIG_REPO, - "NVVMIR_LIBRARY_DIR", - _PYTHON_BIN_PATH, - "TMP", - "TMPDIR", - "TF_CUDA_PATHS", - ], +_ENVIRONS = [ + _GCC_HOST_COMPILER_PATH, + _GCC_HOST_COMPILER_PREFIX, + _CLANG_CUDA_COMPILER_PATH, + "TF_NEED_CUDA", + "TF_CUDA_CLANG", + _TF_DOWNLOAD_CLANG, + _CUDA_TOOLKIT_PATH, + _CUDNN_INSTALL_PATH, + _TF_CUDA_VERSION, + _TF_CUDNN_VERSION, + _TF_CUDA_COMPUTE_CAPABILITIES, + "NVVMIR_LIBRARY_DIR", + _PYTHON_BIN_PATH, + "TMP", + "TMPDIR", + "TF_CUDA_PATHS", +] + +remote_cuda_configure = repository_rule( + implementation = _create_local_cuda_repository, + environ = _ENVIRONS, + remotable = True, + attrs = { + "environ": attr.string_dict(), + }, ) +cuda_configure = repository_rule( + implementation = _cuda_autoconf_impl, + environ = _ENVIRONS + [_TF_CUDA_CONFIG_REPO], +) """Detects and configures the local CUDA toolchain. Add the following to your WORKSPACE FILE: diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index e26e9b485b1..20ff2a4aafa 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -811,6 +811,21 @@ def _create_remote_rocm_repository(repository_ctx, remote_config_repo): config_repo_label(remote_config_repo, "rocm:rocm/rocm_config.h"), {}, ) + repository_ctx.template( + "crosstool/BUILD", + config_repo_label(remote_config_repo, "crosstool:BUILD"), + {}, + ) + repository_ctx.template( + "crosstool/cc_toolchain_config.bzl", + config_repo_label(remote_config_repo, "crosstool:cc_toolchain_config.bzl"), + {}, + ) + repository_ctx.template( + "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", + config_repo_label(remote_config_repo, "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc"), + {}, + ) def _rocm_autoconf_impl(repository_ctx): """Implementation of the rocm_autoconf repository rule.""" @@ -824,20 +839,29 @@ def _rocm_autoconf_impl(repository_ctx): else: _create_local_rocm_repository(repository_ctx) -rocm_configure = repository_rule( - implementation = _rocm_autoconf_impl, - environ = [ - _GCC_HOST_COMPILER_PATH, - _GCC_HOST_COMPILER_PREFIX, - "TF_NEED_ROCM", - _ROCM_TOOLKIT_PATH, - _TF_ROCM_VERSION, - _TF_MIOPEN_VERSION, - _TF_ROCM_AMDGPU_TARGETS, - _TF_ROCM_CONFIG_REPO, - ], +_ENVIRONS = [ + _GCC_HOST_COMPILER_PATH, + _GCC_HOST_COMPILER_PREFIX, + "TF_NEED_ROCM", + _ROCM_TOOLKIT_PATH, + _TF_ROCM_VERSION, + _TF_MIOPEN_VERSION, + _TF_ROCM_AMDGPU_TARGETS, +] + +remote_rocm_configure = repository_rule( + implementation = _create_local_rocm_repository, + environ = _ENVIRONS, + remotable = True, + attrs = { + "environ": attr.string_dict(), + }, ) +rocm_configure = repository_rule( + implementation = _rocm_autoconf_impl, + environ = _ENVIRONS + [_TF_ROCM_CONFIG_REPO], +) """Detects and configures the local ROCm toolchain. Add the following to your WORKSPACE FILE: diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index f05ef7e7a6e..92acb204097 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -139,17 +139,28 @@ def _nccl_autoconf_impl(repository_ctx): else: _create_local_nccl_repository(repository_ctx) +_ENVIRONS = [ + _CUDA_TOOLKIT_PATH, + _NCCL_HDR_PATH, + _NCCL_INSTALL_PATH, + _TF_NCCL_VERSION, + _TF_CUDA_COMPUTE_CAPABILITIES, + _TF_NEED_CUDA, + "TF_CUDA_PATHS", +] + +remote_nccl_configure = repository_rule( + implementation = _create_local_nccl_repository, + environ = _ENVIRONS, + remotable = True, + attrs = { + "environ": attr.string_dict(), + }, +) + nccl_configure = repository_rule( implementation = _nccl_autoconf_impl, - environ = [ - _CUDA_TOOLKIT_PATH, - _NCCL_HDR_PATH, - _NCCL_INSTALL_PATH, - _TF_NCCL_VERSION, - _TF_CUDA_COMPUTE_CAPABILITIES, - _TF_NEED_CUDA, - "TF_CUDA_PATHS", - ], + environ = _ENVIRONS, ) """Detects and configures the NCCL configuration. diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl index a82839c556c..6e9a22f8063 100644 --- a/third_party/py/python_configure.bzl +++ b/third_party/py/python_configure.bzl @@ -262,14 +262,24 @@ def _python_autoconf_impl(repository_ctx): else: _create_local_python_repository(repository_ctx) +_ENVIRONS = [ + BAZEL_SH, + PYTHON_BIN_PATH, + PYTHON_LIB_PATH, +] + +remote_python_configure = repository_rule( + implementation = _create_local_python_repository, + environ = _ENVIRONS, + remotable = True, + attrs = { + "environ": attr.string_dict(), + }, +) + python_configure = repository_rule( implementation = _python_autoconf_impl, - environ = [ - BAZEL_SH, - PYTHON_BIN_PATH, - PYTHON_LIB_PATH, - TF_PYTHON_CONFIG_REPO, - ], + environ = _ENVIRONS + [TF_PYTHON_CONFIG_REPO], ) """Detects and configures the local Python. diff --git a/third_party/remote_config/BUILD.tpl b/third_party/remote_config/BUILD.tpl new file mode 100644 index 00000000000..76f360f3e72 --- /dev/null +++ b/third_party/remote_config/BUILD.tpl @@ -0,0 +1,11 @@ +platform( + name = "platform", + constraint_values = [ + "@bazel_tools//platforms:x86_64", + "@bazel_tools//platforms:linux", + ], + exec_properties = { + "container-image": "%{container_image}", + "Pool": "default", + }, +) diff --git a/third_party/remote_config/remote_platform_configure.bzl b/third_party/remote_config/remote_platform_configure.bzl new file mode 100644 index 00000000000..175649da643 --- /dev/null +++ b/third_party/remote_config/remote_platform_configure.bzl @@ -0,0 +1,17 @@ +"""Repository rule to create a platform for a docker image to be used with RBE.""" + +def _remote_platform_configure_impl(repository_ctx): + repository_ctx.template( + "BUILD", + Label("@org_tensorflow//third_party/remote_config:BUILD.tpl"), + { + "%{container_image}": repository_ctx.attr.container_image, + }, + ) + +remote_platform_configure = repository_rule( + implementation = _remote_platform_configure_impl, + attrs = { + "container_image": attr.string(mandatory = True), + }, +) diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl index f08ded2fee4..6bd71049248 100644 --- a/third_party/tensorrt/tensorrt_configure.bzl +++ b/third_party/tensorrt/tensorrt_configure.bzl @@ -178,15 +178,25 @@ def _tensorrt_configure_impl(repository_ctx): _create_local_tensorrt_repository(repository_ctx) +_ENVIRONS = [ + _TENSORRT_INSTALL_PATH, + _TF_TENSORRT_VERSION, + _TF_NEED_TENSORRT, + "TF_CUDA_PATHS", +] + +remote_tensorrt_configure = repository_rule( + implementation = _create_local_tensorrt_repository, + environ = _ENVIRONS, + remotable = True, + attrs = { + "environ": attr.string_dict(), + }, +) + tensorrt_configure = repository_rule( implementation = _tensorrt_configure_impl, - environ = [ - _TENSORRT_INSTALL_PATH, - _TF_TENSORRT_VERSION, - _TF_TENSORRT_CONFIG_REPO, - _TF_NEED_TENSORRT, - "TF_CUDA_PATHS", - ], + environ = _ENVIRONS + [_TF_TENSORRT_CONFIG_REPO], ) """Detects and configures the local CUDA toolchain. diff --git a/third_party/toolchains/remote_config/BUILD b/third_party/toolchains/remote_config/BUILD new file mode 100644 index 00000000000..e69de29bb2d diff --git a/third_party/toolchains/remote_config/configs.bzl b/third_party/toolchains/remote_config/configs.bzl new file mode 100644 index 00000000000..2c2bcfb59b3 --- /dev/null +++ b/third_party/toolchains/remote_config/configs.bzl @@ -0,0 +1,24 @@ +"""Configurations of RBE builds used with remote config.""" + +load("//third_party/toolchains/remote_config:rbe_config.bzl", "tensorflow_rbe_config") + +def initialize_rbe_configs(): + tensorflow_rbe_config( + name = "ubuntu16.04-py3-gcc7_manylinux2010-cuda10.0-cudnn7-tensorrt5.1", + compiler = "/dt7/usr/bin/gcc", + compiler_prefix = "/usr/bin", + cuda_version = "10.0", + cudnn_version = "7", + os = "ubuntu16.04-manylinux2010", + python_version = "3", + tensorrt_install_path = "/usr", + tensorrt_version = "5.1", + ) + + tensorflow_rbe_config( + name = "ubuntu16.04-py3_opt-gcc5-rocm", + compiler = "gcc", + os = "ubuntu16.04", + python_version = "3", + rocm_version = "2.5", # Any version will do. + ) diff --git a/third_party/toolchains/remote_config/containers.bzl b/third_party/toolchains/remote_config/containers.bzl new file mode 100644 index 00000000000..8813da19e00 --- /dev/null +++ b/third_party/toolchains/remote_config/containers.bzl @@ -0,0 +1,20 @@ +"""Docker images used with remote config and RBE.""" + +load("//third_party/toolchains/preconfig/generate:containers.bzl", "container_digests") + +containers = { + + # Built with //tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu16.04-manylinux2010. + "cuda10.0-cudnn7-ubuntu16.04-manylinux2010": { + "registry": "gcr.io", + "repository": "tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu16.04-manylinux2010", + "digest": container_digests["cuda10.0-cudnn7-ubuntu16.04-manylinux2010"], + }, + + # Built with //tensorflow/tools/ci_build/Dockerfile.rbe.rocm-ubuntu16.04 + "rocm-ubuntu16.04": { + "registry": "gcr.io", + "repository": "tensorflow-testing/nosla-rocm-ubuntu16.04", + "digest": container_digests["rocm-ubuntu16.04"], + }, +} diff --git a/third_party/toolchains/remote_config/rbe_config.bzl b/third_party/toolchains/remote_config/rbe_config.bzl new file mode 100644 index 00000000000..ca186f094a7 --- /dev/null +++ b/third_party/toolchains/remote_config/rbe_config.bzl @@ -0,0 +1,125 @@ +"""Macro that creates external repositories for remote config.""" + +load("//third_party/py:python_configure.bzl", "remote_python_configure") +load("//third_party/gpus:cuda_configure.bzl", "remote_cuda_configure") +load("//third_party/nccl:nccl_configure.bzl", "remote_nccl_configure") +load("//third_party/gpus:rocm_configure.bzl", "remote_rocm_configure") +load("//third_party/tensorrt:tensorrt_configure.bzl", "remote_tensorrt_configure") +load("//third_party/toolchains/remote_config:containers.bzl", "containers") +load("//third_party/remote_config:remote_platform_configure.bzl", "remote_platform_configure") + +def _container_image_uri(container_name): + container = containers[container_name] + return "docker://%s/%s@%s" % (container["registry"], container["repository"], container["digest"]) + +def _tensorflow_rbe_config(name, compiler, python_version, os, rocm_version = None, cuda_version = None, cudnn_version = None, tensorrt_version = None, tensorrt_install_path = None, cudnn_install_path = None, compiler_prefix = None, sysroot = None): + if cuda_version == None and rocm_version == None: + fail("Neither cuda_version nor rocm_version specified. You need to specify exactly one.") + + if cuda_version != None and rocm_version != None: + fail("Specifying both cuda_version and rocm_version is not supported.") + + env = { + "ABI_VERSION": "gcc", + "ABI_LIBC_VERSION": "glibc_2.19", + "BAZEL_COMPILER": compiler, + "BAZEL_HOST_SYSTEM": "i686-unknown-linux-gnu", + "BAZEL_TARGET_LIBC": "glibc_2.19", + "BAZEL_TARGET_CPU": "k8", + "BAZEL_TARGET_SYSTEM": "x86_64-unknown-linux-gnu", + "CC_TOOLCHAIN_NAME": "linux_gnu_x86", + "CC": compiler, + "PYTHON_BIN_PATH": "/usr/bin/python%s" % python_version, + "CLEAR_CACHE": "1", + "HOST_CXX_COMPILER": compiler, + "HOST_C_COMPILER": compiler, + } + + if cuda_version != None: + # The cuda toolchain currently contains its own C++ toolchain definition, + # so we do not fetch local_config_cc. + env.update({ + "TF_NEED_CUDA": "1", + "TF_CUDA_CLANG": "1" if compiler.endswith("clang") else "0", + "TF_CUDA_COMPUTE_CAPABILITIES": "3.0,6.0", + "TF_ENABLE_XLA": "1", + "TF_CUDNN_VERSION": cudnn_version, + "TF_CUDA_VERSION": cuda_version, + "CUDNN_INSTALL_PATH": cudnn_install_path if cudnn_install_path != None else "/usr/lib/x86_64-linux-gnu", + "TF_NEED_TENSORRT": "1", + "TF_TENSORRT_VERSION": tensorrt_version, + "TENSORRT_INSTALL_PATH": tensorrt_install_path if tensorrt_install_path != None else "/usr/lib/x86_64-linux-gnu", + "GCC_HOST_COMPILER_PATH": compiler if not compiler.endswith("clang") else "", + "GCC_HOST_COMPILER_PREFIX": compiler_prefix if compiler_prefix != None else "/usr/bin", + "CLANG_CUDA_COMPILER_PATH": compiler if compiler.endswith("clang") else "", + "TF_SYSROOT": sysroot if sysroot else "", + }) + + container_name = "cuda%s-cudnn%s-%s" % (cuda_version, cudnn_version, os) + container_image = _container_image_uri(container_name) + exec_properties = { + "container-image": container_image, + "Pool": "default", + } + + remote_platform_configure( + name = "%s_config_platform" % name, + container_image = container_image, + ) + + remote_python_configure( + name = "%s_config_python" % name, + environ = env, + exec_properties = exec_properties, + ) + + remote_cuda_configure( + name = "%s_config_cuda" % name, + environ = env, + exec_properties = exec_properties, + ) + + remote_nccl_configure( + name = "%s_config_nccl" % name, + environ = env, + exec_properties = exec_properties, + ) + + remote_tensorrt_configure( + name = "%s_config_tensorrt" % name, + environ = env, + exec_properties = exec_properties, + ) + elif rocm_version != None: + # The rocm toolchain currently contains its own C++ toolchain definition, + # so we do not fetch local_config_cc. + env.update({ + "TF_NEED_ROCM": "1", + "TF_ENABLE_XLA": "0", + }) + + container_name = "rocm-%s" % (os) + container_image = _container_image_uri(container_name) + exec_properties = { + "container-image": container_image, + "Pool": "default", + } + + remote_platform_configure( + name = "%s_config_platform" % name, + container_image = container_image, + ) + + remote_python_configure( + name = "%s_config_python" % name, + environ = env, + exec_properties = exec_properties, + ) + + remote_rocm_configure( + name = "%s_config_rocm" % name, + environ = env, + exec_properties = exec_properties, + ) + +tensorflow_rbe_config = _tensorflow_rbe_config