From 6dd6ad9fd7353d945227fd87a93ef1f11eccc75e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 14 Jan 2019 01:32:22 -0800 Subject: [PATCH] Set up remote GPU testing. Currently, we set the tag "local" for GPU tests in order to be able to execute remote CPU and local GPU tests within the same bazel invocation. This change introduces the possibility to set REMOTE_GPU_TESTING to enable GPU tests to also run remotely; given that tags cannot use starlark's select, we use an autoconfig rule that defines a function returning the tags we want: "local" by default and "remote-gpu" if REMOTE_GPU_TESTING is set. The platform is set via exec_compatible_with constraints, so we select on the "remote-gpu" tag to add a constraint that is only fulfilled by GPU-enabled platforms. PiperOrigin-RevId: 229141861 --- tensorflow/compiler/tests/build_defs.bzl | 5 ++- .../platform/default/build_config_root.bzl | 12 +++++- tensorflow/opensource_only.files | 4 ++ tensorflow/tensorflow.bzl | 8 +++- tensorflow/workspace.bzl | 2 + third_party/toolchains/BUILD | 35 +++++++++++++++ third_party/toolchains/remote/BUILD | 0 third_party/toolchains/remote/BUILD.tpl | 0 third_party/toolchains/remote/configure.bzl | 43 +++++++++++++++++++ .../toolchains/remote/execution.bzl.tpl | 2 + 10 files changed, 107 insertions(+), 4 deletions(-) create mode 100644 third_party/toolchains/remote/BUILD create mode 100644 third_party/toolchains/remote/BUILD.tpl create mode 100644 third_party/toolchains/remote/configure.bzl create mode 100644 third_party/toolchains/remote/execution.bzl.tpl diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl index be9766c4ef4..ed580f95b6c 100644 --- a/tensorflow/compiler/tests/build_defs.bzl +++ b/tensorflow/compiler/tests/build_defs.bzl @@ -5,6 +5,7 @@ load("//tensorflow/compiler/tests:plugin.bzl", "plugins") load( "//tensorflow/core:platform/default/build_config_root.bzl", "tf_cuda_tests_tags", + "tf_exec_compatible_with", ) def all_backends(): @@ -84,6 +85,7 @@ def tf_xla_py_test( else: fail("Unknown backend {}".format(backend)) + test_tags = tags + backend_tags native.py_test( name = test_name, srcs = srcs, @@ -92,7 +94,8 @@ def tf_xla_py_test( main = "{}.py".format(name) if main == None else main, data = data + backend_data, deps = deps + backend_deps, - tags = tags + backend_tags, + tags = test_tags, + exec_compatible_with = tf_exec_compatible_with({"tags": test_tags}), **kwargs ) test_names.append(test_name) diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl index 37475feebe2..ab05b25d682 100644 --- a/tensorflow/core/platform/default/build_config_root.bzl +++ b/tensorflow/core/platform/default/build_config_root.bzl @@ -2,11 +2,19 @@ # The functions in this file might be referred by tensorflow.bzl. They have to # be separate to avoid cyclic references. +load("@local_config_remote_execution//:remote_execution.bzl", "gpu_test_tags") + def tf_cuda_tests_tags(): - return ["requires-gpu", "local", "gpu"] + return ["requires-gpu", "gpu"] + gpu_test_tags() def tf_sycl_tests_tags(): - return ["requires-gpu", "local", "gpu"] + return ["requires-gpu", "gpu"] + gpu_test_tags() + +def tf_exec_compatible_with(kwargs): + if ("tags" in kwargs and kwargs["tags"] != None and + "remote-gpu" in kwargs["tags"]): + return ["@org_tensorflow//third_party/toolchains:gpu_test"] + return [] def tf_additional_plugin_deps(): return select({ diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index bade64dcf82..03b5e5a73cd 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -71,6 +71,10 @@ tensorflow/third_party/toolchains/cpus/arm/CROSSTOOL.tpl tensorflow/third_party/toolchains/cpus/arm/BUILD tensorflow/third_party/toolchains/cpus/py3/BUILD tensorflow/third_party/toolchains/cpus/py/BUILD +tensorflow/third_party/toolchains/remote/configure.bzl +tensorflow/third_party/toolchains/remote/BUILD.tpl +tensorflow/third_party/toolchains/remote/BUILD +tensorflow/third_party/toolchains/remote/execution.bzl.tpl tensorflow/third_party/toolchains/BUILD tensorflow/third_party/gpus/BUILD tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 1024b686ebd..b7da5bf9135 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -9,6 +9,7 @@ load( "tf_additional_grpc_deps_py", "tf_additional_xla_deps_py", "tf_cuda_tests_tags", + "tf_exec_compatible_with", "tf_sycl_tests_tags", ) load( @@ -785,6 +786,7 @@ def tf_cc_test( ], ), data = data + tf_binary_dynamic_kernel_dsos(kernels), + exec_compatible_with = tf_exec_compatible_with(kwargs), # Nested select() statements seem not to be supported when passed to # linkstatic, and we already have a cuda select() passed in to this # function. @@ -897,6 +899,7 @@ def tf_cuda_only_cc_test( args = [], kernels = [], linkopts = []): + tags = tags + tf_cuda_tests_tags() native.cc_test( name = "%s%s" % (name, "_gpu"), srcs = srcs + tf_binary_additional_srcs(), @@ -919,7 +922,8 @@ def tf_cuda_only_cc_test( clean_dep("//tensorflow:darwin"): 1, "//conditions:default": 0, }), - tags = tags + tf_cuda_tests_tags(), + tags = tags, + exec_compatible_with = tf_exec_compatible_with({"tags": tags}), ) register_extension_info( @@ -983,6 +987,7 @@ def tf_cc_test_mkl( }) + _rpath_linkopts(src_to_test_name(src)), deps = deps + tf_binary_dynamic_kernel_deps(kernels) + mkl_deps(), data = data + tf_binary_dynamic_kernel_dsos(kernels), + exec_compatible_with = tf_exec_compatible_with({"tags": tags}), linkstatic = linkstatic, tags = tags, size = size, @@ -1752,6 +1757,7 @@ def py_test(deps = [], data = [], kernels = [], **kwargs): "//conditions:default": [], clean_dep("//tensorflow:no_tensorflow_py_deps"): ["//tensorflow/tools/pip_package:win_pip_package_marker"], }) + tf_binary_dynamic_kernel_dsos(kernels), + exec_compatible_with = tf_exec_compatible_with(kwargs), **kwargs ) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index bea57b8ed72..46c1261be21 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -10,6 +10,7 @@ load("//third_party/py:python_configure.bzl", "python_configure") load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure") +load("//third_party/toolchains/remote:configure.bzl", "remote_execution_configure") load("//third_party/toolchains/clang6:repo.bzl", "clang6_configure") load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compiler_configure") load("//third_party:repo.bzl", "tf_http_archive") @@ -64,6 +65,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): syslibs_configure(name = "local_config_syslibs") python_configure(name = "local_config_python") rocm_configure(name = "local_config_rocm") + remote_execution_configure(name = "local_config_remote_execution") initialize_third_party() diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD index 6ed6e5c3679..509b5414003 100644 --- a/third_party/toolchains/BUILD +++ b/third_party/toolchains/BUILD @@ -4,6 +4,17 @@ package(default_visibility = ["//visibility:public"]) load("//third_party/toolchains/preconfig/generate:containers.bzl", "container_digests") +# Constraint used for platforms below so we can force certain rules to be executed +# on specific platforms. +constraint_setting(name = "custom_platforms") + +# Constraint for platforms that allow GPU testing (i.e. have a GPU available). +# This is used in exec_compatible_with of rules that need GPU access. +constraint_value( + name = "gpu_test", + constraint_setting = ":custom_platforms", +) + # TODO(b/122347293): This is the RBE config based on the CPU configuration / image provided # in the asci-toolchain setup. Delete this once we switched CPU remote builds to the # new platform below. @@ -68,3 +79,27 @@ platform( value:"docker://gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04@%s" }""" % container_digests["cuda10.0-cudnn7-ubuntu14.04"], ) + +# The above platform with GPU support. +platform( + name = "rbe_cuda10.0-cudnn7-ubuntu14.04-gpu", + constraint_values = [ + "@bazel_tools//platforms:x86_64", + "@bazel_tools//platforms:linux", + ":gpu_test", + ], + remote_execution_properties = """ + properties: { + name: "container-image" + value: "docker://gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04@%s" + } + properties: { + name: "dockerRuntime" + value: "nvidia" + } + properties: { + name: "Pool" + value: "gpu-pool" + } + """ % container_digests["cuda10.0-cudnn7-ubuntu14.04"], +) diff --git a/third_party/toolchains/remote/BUILD b/third_party/toolchains/remote/BUILD new file mode 100644 index 00000000000..e69de29bb2d diff --git a/third_party/toolchains/remote/BUILD.tpl b/third_party/toolchains/remote/BUILD.tpl new file mode 100644 index 00000000000..e69de29bb2d diff --git a/third_party/toolchains/remote/configure.bzl b/third_party/toolchains/remote/configure.bzl new file mode 100644 index 00000000000..cc5b9842648 --- /dev/null +++ b/third_party/toolchains/remote/configure.bzl @@ -0,0 +1,43 @@ +"""Repository rule for remote GPU autoconfiguration. + +This rule creates the starlark file +//third_party/toolchains/remote:execution.bzl +providing the function `gpu_test_tags`. + +`gpu_test_tags` will return: + + * `local`: if `REMOTE_GPU_TESTING` is false, allowing CPU tests to run + remotely and GPU tests to run locally in the same bazel invocation. + * `remote-gpu`: if `REMOTE_GPU_TESTING` is true; this allows rules to + set an execution requirement that enables a GPU-enabled remote platform. +""" + +_REMOTE_GPU_TESTING = "REMOTE_GPU_TESTING" + +def _flag_enabled(repository_ctx, flag_name): + if flag_name not in repository_ctx.os.environ: + return False + return repository_ctx.os.environ[flag_name].strip() == "1" + +def _remote_execution_configure(repository_ctx): + # If we do not support remote gpu test execution, mark them as local, so we + # can combine remote builds with local gpu tests. + gpu_test_tags = "\"local\"" + if _flag_enabled(repository_ctx, _REMOTE_GPU_TESTING): + gpu_test_tags = "\"remote-gpu\"" + repository_ctx.template( + "remote_execution.bzl", + Label("//third_party/toolchains/remote:execution.bzl.tpl"), + { + "%{gpu_test_tags}": gpu_test_tags, + }, + ) + repository_ctx.template( + "BUILD", + Label("//third_party/toolchains/remote:BUILD.tpl"), + ) + +remote_execution_configure = repository_rule( + implementation = _remote_execution_configure, + environ = [_REMOTE_GPU_TESTING], +) diff --git a/third_party/toolchains/remote/execution.bzl.tpl b/third_party/toolchains/remote/execution.bzl.tpl new file mode 100644 index 00000000000..18858cc0dc0 --- /dev/null +++ b/third_party/toolchains/remote/execution.bzl.tpl @@ -0,0 +1,2 @@ +def gpu_test_tags(): + return [%{gpu_test_tags}]