Set up remote GPU testing.

Currently, we set the tag "local" for GPU tests in order to be able to execute
remote CPU and local GPU tests within the same bazel invocation.

This change introduces the possibility to set REMOTE_GPU_TESTING to enable
GPU tests to also run remotely; given that tags cannot use starlark's select,
we use an autoconfig rule that defines a function returning the tags we want:
"local" by default and "remote-gpu" if REMOTE_GPU_TESTING is set.

The platform is set via exec_compatible_with constraints, so we select on the
"remote-gpu" tag to add a constraint that is only fulfilled by GPU-enabled
platforms.

PiperOrigin-RevId: 229141861
This commit is contained in:
A. Unique TensorFlower 2019-01-14 01:32:22 -08:00 committed by TensorFlower Gardener
parent cfbf81335a
commit 6dd6ad9fd7
10 changed files with 107 additions and 4 deletions

View File

@ -5,6 +5,7 @@ load("//tensorflow/compiler/tests:plugin.bzl", "plugins")
load(
"//tensorflow/core:platform/default/build_config_root.bzl",
"tf_cuda_tests_tags",
"tf_exec_compatible_with",
)
def all_backends():
@ -84,6 +85,7 @@ def tf_xla_py_test(
else:
fail("Unknown backend {}".format(backend))
test_tags = tags + backend_tags
native.py_test(
name = test_name,
srcs = srcs,
@ -92,7 +94,8 @@ def tf_xla_py_test(
main = "{}.py".format(name) if main == None else main,
data = data + backend_data,
deps = deps + backend_deps,
tags = tags + backend_tags,
tags = test_tags,
exec_compatible_with = tf_exec_compatible_with({"tags": test_tags}),
**kwargs
)
test_names.append(test_name)

View File

@ -2,11 +2,19 @@
# The functions in this file might be referred by tensorflow.bzl. They have to
# be separate to avoid cyclic references.
load("@local_config_remote_execution//:remote_execution.bzl", "gpu_test_tags")
def tf_cuda_tests_tags():
return ["requires-gpu", "local", "gpu"]
return ["requires-gpu", "gpu"] + gpu_test_tags()
def tf_sycl_tests_tags():
return ["requires-gpu", "local", "gpu"]
return ["requires-gpu", "gpu"] + gpu_test_tags()
def tf_exec_compatible_with(kwargs):
if ("tags" in kwargs and kwargs["tags"] != None and
"remote-gpu" in kwargs["tags"]):
return ["@org_tensorflow//third_party/toolchains:gpu_test"]
return []
def tf_additional_plugin_deps():
return select({

View File

@ -71,6 +71,10 @@ tensorflow/third_party/toolchains/cpus/arm/CROSSTOOL.tpl
tensorflow/third_party/toolchains/cpus/arm/BUILD
tensorflow/third_party/toolchains/cpus/py3/BUILD
tensorflow/third_party/toolchains/cpus/py/BUILD
tensorflow/third_party/toolchains/remote/configure.bzl
tensorflow/third_party/toolchains/remote/BUILD.tpl
tensorflow/third_party/toolchains/remote/BUILD
tensorflow/third_party/toolchains/remote/execution.bzl.tpl
tensorflow/third_party/toolchains/BUILD
tensorflow/third_party/gpus/BUILD
tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl

View File

@ -9,6 +9,7 @@ load(
"tf_additional_grpc_deps_py",
"tf_additional_xla_deps_py",
"tf_cuda_tests_tags",
"tf_exec_compatible_with",
"tf_sycl_tests_tags",
)
load(
@ -785,6 +786,7 @@ def tf_cc_test(
],
),
data = data + tf_binary_dynamic_kernel_dsos(kernels),
exec_compatible_with = tf_exec_compatible_with(kwargs),
# Nested select() statements seem not to be supported when passed to
# linkstatic, and we already have a cuda select() passed in to this
# function.
@ -897,6 +899,7 @@ def tf_cuda_only_cc_test(
args = [],
kernels = [],
linkopts = []):
tags = tags + tf_cuda_tests_tags()
native.cc_test(
name = "%s%s" % (name, "_gpu"),
srcs = srcs + tf_binary_additional_srcs(),
@ -919,7 +922,8 @@ def tf_cuda_only_cc_test(
clean_dep("//tensorflow:darwin"): 1,
"//conditions:default": 0,
}),
tags = tags + tf_cuda_tests_tags(),
tags = tags,
exec_compatible_with = tf_exec_compatible_with({"tags": tags}),
)
register_extension_info(
@ -983,6 +987,7 @@ def tf_cc_test_mkl(
}) + _rpath_linkopts(src_to_test_name(src)),
deps = deps + tf_binary_dynamic_kernel_deps(kernels) + mkl_deps(),
data = data + tf_binary_dynamic_kernel_dsos(kernels),
exec_compatible_with = tf_exec_compatible_with({"tags": tags}),
linkstatic = linkstatic,
tags = tags,
size = size,
@ -1752,6 +1757,7 @@ def py_test(deps = [], data = [], kernels = [], **kwargs):
"//conditions:default": [],
clean_dep("//tensorflow:no_tensorflow_py_deps"): ["//tensorflow/tools/pip_package:win_pip_package_marker"],
}) + tf_binary_dynamic_kernel_dsos(kernels),
exec_compatible_with = tf_exec_compatible_with(kwargs),
**kwargs
)

View File

@ -10,6 +10,7 @@ load("//third_party/py:python_configure.bzl", "python_configure")
load("//third_party/sycl:sycl_configure.bzl", "sycl_configure")
load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure")
load("//third_party/toolchains/remote:configure.bzl", "remote_execution_configure")
load("//third_party/toolchains/clang6:repo.bzl", "clang6_configure")
load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compiler_configure")
load("//third_party:repo.bzl", "tf_http_archive")
@ -64,6 +65,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
syslibs_configure(name = "local_config_syslibs")
python_configure(name = "local_config_python")
rocm_configure(name = "local_config_rocm")
remote_execution_configure(name = "local_config_remote_execution")
initialize_third_party()

View File

@ -4,6 +4,17 @@ package(default_visibility = ["//visibility:public"])
load("//third_party/toolchains/preconfig/generate:containers.bzl", "container_digests")
# Constraint used for platforms below so we can force certain rules to be executed
# on specific platforms.
constraint_setting(name = "custom_platforms")
# Constraint for platforms that allow GPU testing (i.e. have a GPU available).
# This is used in exec_compatible_with of rules that need GPU access.
constraint_value(
name = "gpu_test",
constraint_setting = ":custom_platforms",
)
# TODO(b/122347293): This is the RBE config based on the CPU configuration / image provided
# in the asci-toolchain setup. Delete this once we switched CPU remote builds to the
# new platform below.
@ -68,3 +79,27 @@ platform(
value:"docker://gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04@%s"
}""" % container_digests["cuda10.0-cudnn7-ubuntu14.04"],
)
# The above platform with GPU support.
platform(
name = "rbe_cuda10.0-cudnn7-ubuntu14.04-gpu",
constraint_values = [
"@bazel_tools//platforms:x86_64",
"@bazel_tools//platforms:linux",
":gpu_test",
],
remote_execution_properties = """
properties: {
name: "container-image"
value: "docker://gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04@%s"
}
properties: {
name: "dockerRuntime"
value: "nvidia"
}
properties: {
name: "Pool"
value: "gpu-pool"
}
""" % container_digests["cuda10.0-cudnn7-ubuntu14.04"],
)

0
third_party/toolchains/remote/BUILD vendored Normal file
View File

View File

View File

@ -0,0 +1,43 @@
"""Repository rule for remote GPU autoconfiguration.
This rule creates the starlark file
//third_party/toolchains/remote:execution.bzl
providing the function `gpu_test_tags`.
`gpu_test_tags` will return:
* `local`: if `REMOTE_GPU_TESTING` is false, allowing CPU tests to run
remotely and GPU tests to run locally in the same bazel invocation.
* `remote-gpu`: if `REMOTE_GPU_TESTING` is true; this allows rules to
set an execution requirement that enables a GPU-enabled remote platform.
"""
_REMOTE_GPU_TESTING = "REMOTE_GPU_TESTING"
def _flag_enabled(repository_ctx, flag_name):
if flag_name not in repository_ctx.os.environ:
return False
return repository_ctx.os.environ[flag_name].strip() == "1"
def _remote_execution_configure(repository_ctx):
# If we do not support remote gpu test execution, mark them as local, so we
# can combine remote builds with local gpu tests.
gpu_test_tags = "\"local\""
if _flag_enabled(repository_ctx, _REMOTE_GPU_TESTING):
gpu_test_tags = "\"remote-gpu\""
repository_ctx.template(
"remote_execution.bzl",
Label("//third_party/toolchains/remote:execution.bzl.tpl"),
{
"%{gpu_test_tags}": gpu_test_tags,
},
)
repository_ctx.template(
"BUILD",
Label("//third_party/toolchains/remote:BUILD.tpl"),
)
remote_execution_configure = repository_rule(
implementation = _remote_execution_configure,
environ = [_REMOTE_GPU_TESTING],
)

View File

@ -0,0 +1,2 @@
def gpu_test_tags():
return [%{gpu_test_tags}]