Set up remote GPU testing.
Currently, we set the tag "local" for GPU tests in order to be able to execute remote CPU and local GPU tests within the same bazel invocation. This change introduces the possibility to set REMOTE_GPU_TESTING to enable GPU tests to also run remotely; given that tags cannot use starlark's select, we use an autoconfig rule that defines a function returning the tags we want: "local" by default and "remote-gpu" if REMOTE_GPU_TESTING is set. The platform is set via exec_compatible_with constraints, so we select on the "remote-gpu" tag to add a constraint that is only fulfilled by GPU-enabled platforms. PiperOrigin-RevId: 229141861
This commit is contained in:
parent
cfbf81335a
commit
6dd6ad9fd7
@ -5,6 +5,7 @@ load("//tensorflow/compiler/tests:plugin.bzl", "plugins")
|
||||
load(
|
||||
"//tensorflow/core:platform/default/build_config_root.bzl",
|
||||
"tf_cuda_tests_tags",
|
||||
"tf_exec_compatible_with",
|
||||
)
|
||||
|
||||
def all_backends():
|
||||
@ -84,6 +85,7 @@ def tf_xla_py_test(
|
||||
else:
|
||||
fail("Unknown backend {}".format(backend))
|
||||
|
||||
test_tags = tags + backend_tags
|
||||
native.py_test(
|
||||
name = test_name,
|
||||
srcs = srcs,
|
||||
@ -92,7 +94,8 @@ def tf_xla_py_test(
|
||||
main = "{}.py".format(name) if main == None else main,
|
||||
data = data + backend_data,
|
||||
deps = deps + backend_deps,
|
||||
tags = tags + backend_tags,
|
||||
tags = test_tags,
|
||||
exec_compatible_with = tf_exec_compatible_with({"tags": test_tags}),
|
||||
**kwargs
|
||||
)
|
||||
test_names.append(test_name)
|
||||
|
@ -2,11 +2,19 @@
|
||||
# The functions in this file might be referred by tensorflow.bzl. They have to
|
||||
# be separate to avoid cyclic references.
|
||||
|
||||
load("@local_config_remote_execution//:remote_execution.bzl", "gpu_test_tags")
|
||||
|
||||
def tf_cuda_tests_tags():
|
||||
return ["requires-gpu", "local", "gpu"]
|
||||
return ["requires-gpu", "gpu"] + gpu_test_tags()
|
||||
|
||||
def tf_sycl_tests_tags():
|
||||
return ["requires-gpu", "local", "gpu"]
|
||||
return ["requires-gpu", "gpu"] + gpu_test_tags()
|
||||
|
||||
def tf_exec_compatible_with(kwargs):
|
||||
if ("tags" in kwargs and kwargs["tags"] != None and
|
||||
"remote-gpu" in kwargs["tags"]):
|
||||
return ["@org_tensorflow//third_party/toolchains:gpu_test"]
|
||||
return []
|
||||
|
||||
def tf_additional_plugin_deps():
|
||||
return select({
|
||||
|
@ -71,6 +71,10 @@ tensorflow/third_party/toolchains/cpus/arm/CROSSTOOL.tpl
|
||||
tensorflow/third_party/toolchains/cpus/arm/BUILD
|
||||
tensorflow/third_party/toolchains/cpus/py3/BUILD
|
||||
tensorflow/third_party/toolchains/cpus/py/BUILD
|
||||
tensorflow/third_party/toolchains/remote/configure.bzl
|
||||
tensorflow/third_party/toolchains/remote/BUILD.tpl
|
||||
tensorflow/third_party/toolchains/remote/BUILD
|
||||
tensorflow/third_party/toolchains/remote/execution.bzl.tpl
|
||||
tensorflow/third_party/toolchains/BUILD
|
||||
tensorflow/third_party/gpus/BUILD
|
||||
tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
|
||||
|
@ -9,6 +9,7 @@ load(
|
||||
"tf_additional_grpc_deps_py",
|
||||
"tf_additional_xla_deps_py",
|
||||
"tf_cuda_tests_tags",
|
||||
"tf_exec_compatible_with",
|
||||
"tf_sycl_tests_tags",
|
||||
)
|
||||
load(
|
||||
@ -785,6 +786,7 @@ def tf_cc_test(
|
||||
],
|
||||
),
|
||||
data = data + tf_binary_dynamic_kernel_dsos(kernels),
|
||||
exec_compatible_with = tf_exec_compatible_with(kwargs),
|
||||
# Nested select() statements seem not to be supported when passed to
|
||||
# linkstatic, and we already have a cuda select() passed in to this
|
||||
# function.
|
||||
@ -897,6 +899,7 @@ def tf_cuda_only_cc_test(
|
||||
args = [],
|
||||
kernels = [],
|
||||
linkopts = []):
|
||||
tags = tags + tf_cuda_tests_tags()
|
||||
native.cc_test(
|
||||
name = "%s%s" % (name, "_gpu"),
|
||||
srcs = srcs + tf_binary_additional_srcs(),
|
||||
@ -919,7 +922,8 @@ def tf_cuda_only_cc_test(
|
||||
clean_dep("//tensorflow:darwin"): 1,
|
||||
"//conditions:default": 0,
|
||||
}),
|
||||
tags = tags + tf_cuda_tests_tags(),
|
||||
tags = tags,
|
||||
exec_compatible_with = tf_exec_compatible_with({"tags": tags}),
|
||||
)
|
||||
|
||||
register_extension_info(
|
||||
@ -983,6 +987,7 @@ def tf_cc_test_mkl(
|
||||
}) + _rpath_linkopts(src_to_test_name(src)),
|
||||
deps = deps + tf_binary_dynamic_kernel_deps(kernels) + mkl_deps(),
|
||||
data = data + tf_binary_dynamic_kernel_dsos(kernels),
|
||||
exec_compatible_with = tf_exec_compatible_with({"tags": tags}),
|
||||
linkstatic = linkstatic,
|
||||
tags = tags,
|
||||
size = size,
|
||||
@ -1752,6 +1757,7 @@ def py_test(deps = [], data = [], kernels = [], **kwargs):
|
||||
"//conditions:default": [],
|
||||
clean_dep("//tensorflow:no_tensorflow_py_deps"): ["//tensorflow/tools/pip_package:win_pip_package_marker"],
|
||||
}) + tf_binary_dynamic_kernel_dsos(kernels),
|
||||
exec_compatible_with = tf_exec_compatible_with(kwargs),
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
@ -10,6 +10,7 @@ load("//third_party/py:python_configure.bzl", "python_configure")
|
||||
|
||||
load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
|
||||
load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure")
|
||||
load("//third_party/toolchains/remote:configure.bzl", "remote_execution_configure")
|
||||
load("//third_party/toolchains/clang6:repo.bzl", "clang6_configure")
|
||||
load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compiler_configure")
|
||||
load("//third_party:repo.bzl", "tf_http_archive")
|
||||
@ -64,6 +65,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
|
||||
syslibs_configure(name = "local_config_syslibs")
|
||||
python_configure(name = "local_config_python")
|
||||
rocm_configure(name = "local_config_rocm")
|
||||
remote_execution_configure(name = "local_config_remote_execution")
|
||||
|
||||
initialize_third_party()
|
||||
|
||||
|
35
third_party/toolchains/BUILD
vendored
35
third_party/toolchains/BUILD
vendored
@ -4,6 +4,17 @@ package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load("//third_party/toolchains/preconfig/generate:containers.bzl", "container_digests")
|
||||
|
||||
# Constraint used for platforms below so we can force certain rules to be executed
|
||||
# on specific platforms.
|
||||
constraint_setting(name = "custom_platforms")
|
||||
|
||||
# Constraint for platforms that allow GPU testing (i.e. have a GPU available).
|
||||
# This is used in exec_compatible_with of rules that need GPU access.
|
||||
constraint_value(
|
||||
name = "gpu_test",
|
||||
constraint_setting = ":custom_platforms",
|
||||
)
|
||||
|
||||
# TODO(b/122347293): This is the RBE config based on the CPU configuration / image provided
|
||||
# in the asci-toolchain setup. Delete this once we switched CPU remote builds to the
|
||||
# new platform below.
|
||||
@ -68,3 +79,27 @@ platform(
|
||||
value:"docker://gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04@%s"
|
||||
}""" % container_digests["cuda10.0-cudnn7-ubuntu14.04"],
|
||||
)
|
||||
|
||||
# The above platform with GPU support.
|
||||
platform(
|
||||
name = "rbe_cuda10.0-cudnn7-ubuntu14.04-gpu",
|
||||
constraint_values = [
|
||||
"@bazel_tools//platforms:x86_64",
|
||||
"@bazel_tools//platforms:linux",
|
||||
":gpu_test",
|
||||
],
|
||||
remote_execution_properties = """
|
||||
properties: {
|
||||
name: "container-image"
|
||||
value: "docker://gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04@%s"
|
||||
}
|
||||
properties: {
|
||||
name: "dockerRuntime"
|
||||
value: "nvidia"
|
||||
}
|
||||
properties: {
|
||||
name: "Pool"
|
||||
value: "gpu-pool"
|
||||
}
|
||||
""" % container_digests["cuda10.0-cudnn7-ubuntu14.04"],
|
||||
)
|
||||
|
0
third_party/toolchains/remote/BUILD
vendored
Normal file
0
third_party/toolchains/remote/BUILD
vendored
Normal file
0
third_party/toolchains/remote/BUILD.tpl
vendored
Normal file
0
third_party/toolchains/remote/BUILD.tpl
vendored
Normal file
43
third_party/toolchains/remote/configure.bzl
vendored
Normal file
43
third_party/toolchains/remote/configure.bzl
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
"""Repository rule for remote GPU autoconfiguration.
|
||||
|
||||
This rule creates the starlark file
|
||||
//third_party/toolchains/remote:execution.bzl
|
||||
providing the function `gpu_test_tags`.
|
||||
|
||||
`gpu_test_tags` will return:
|
||||
|
||||
* `local`: if `REMOTE_GPU_TESTING` is false, allowing CPU tests to run
|
||||
remotely and GPU tests to run locally in the same bazel invocation.
|
||||
* `remote-gpu`: if `REMOTE_GPU_TESTING` is true; this allows rules to
|
||||
set an execution requirement that enables a GPU-enabled remote platform.
|
||||
"""
|
||||
|
||||
_REMOTE_GPU_TESTING = "REMOTE_GPU_TESTING"
|
||||
|
||||
def _flag_enabled(repository_ctx, flag_name):
|
||||
if flag_name not in repository_ctx.os.environ:
|
||||
return False
|
||||
return repository_ctx.os.environ[flag_name].strip() == "1"
|
||||
|
||||
def _remote_execution_configure(repository_ctx):
|
||||
# If we do not support remote gpu test execution, mark them as local, so we
|
||||
# can combine remote builds with local gpu tests.
|
||||
gpu_test_tags = "\"local\""
|
||||
if _flag_enabled(repository_ctx, _REMOTE_GPU_TESTING):
|
||||
gpu_test_tags = "\"remote-gpu\""
|
||||
repository_ctx.template(
|
||||
"remote_execution.bzl",
|
||||
Label("//third_party/toolchains/remote:execution.bzl.tpl"),
|
||||
{
|
||||
"%{gpu_test_tags}": gpu_test_tags,
|
||||
},
|
||||
)
|
||||
repository_ctx.template(
|
||||
"BUILD",
|
||||
Label("//third_party/toolchains/remote:BUILD.tpl"),
|
||||
)
|
||||
|
||||
remote_execution_configure = repository_rule(
|
||||
implementation = _remote_execution_configure,
|
||||
environ = [_REMOTE_GPU_TESTING],
|
||||
)
|
2
third_party/toolchains/remote/execution.bzl.tpl
vendored
Normal file
2
third_party/toolchains/remote/execution.bzl.tpl
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
def gpu_test_tags():
|
||||
return [%{gpu_test_tags}]
|
Loading…
Reference in New Issue
Block a user