Add Starlark rules to generate cubin headers.

Also add a cuda_gpu_architectures macro for getting a list of CUDA GPU architectures. PiperOrigin-RevId: 311494598 Change-Id: Ie573c2d22a42ab9e0002bdcfbee5be534b87cd2c
2020-05-14 02:24:39 -07:00 · 2020-05-14 02:24:39 -07:00 · 23d478c422
parent e10d6dd07b
commit 23d478c422
5 changed files with 129 additions and 0 deletions
--- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD
@ -40,6 +40,7 @@ cc_library(
 tf_cc_binary(
    name = "tf_to_cubin",
    srcs = ["tf_to_cubin.cc"],
    visibility = ["//tensorflow/core/kernels/cubin_headers:__pkg__"],
    deps = [
        ":cubin_creator",
        "//tensorflow/core:framework_internal",
--- a/tensorflow/core/kernels/cubin_headers/build_defs.bzl
+++ b/tensorflow/core/kernels/cubin_headers/build_defs.bzl
@ -0,0 +1,101 @@
 """Generates cubin headers for TF dialect ops."""
 load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cuda")
 def _lookup_file(filegroup, path):
    """Extracts file at (relative) path in filegroup."""
    for file in filegroup.files.to_list():
        if file.path.endswith(path):
            return file
    return None
 def _gen_kernel_image_hdr_impl(ctx):
    if not ctx.attr.gpu_archs:
        fail("No GPU architecture specified, use --config=cuda or similar")
    name = ctx.attr.name
    tile_sizes = ctx.attr.tile_size.replace("x", ",")
    same_shape = []
    if ctx.attr.same_shape:
        same_shape.append("--same_shape=%s" % ctx.attr.same_shape)
    cubins = []
    images = []
    for arch in ctx.attr.gpu_archs:
        filename = "%s.%s.cubin" % (name, arch)
        cubin = ctx.actions.declare_file(filename)
        ctx.actions.run(
            outputs = [cubin],
            executable = ctx.executable._tool,
            arguments = same_shape + [
                "--tile_sizes=%s" % tile_sizes,
                "--arch=%s" % arch.split("_")[1],
                "--output=%s" % cubin.path,
                ctx.attr.op,
            ],
            mnemonic = "compile",
        )
        cubins.append(cubin)
        images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
    # Generate fatbin file from all cubins.
    fatbin = ctx.actions.declare_file("%s.fatbin" % name)
    ctx.actions.run(
        outputs = [fatbin],
        inputs = cubins,
        executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"),
        arguments = [
            "--64",
            "--cmdline=--compile-only",
            "--link",
            "--compress-all",
            "--create=%s" % fatbin.path,
        ] + images,
        mnemonic = "fatbinary",
    )
    bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c")
    ctx.actions.run_shell(
        outputs = [ctx.outputs.out],
        inputs = [fatbin],
        tools = [bin2c],
        command = "%s --static --const --type=int --name=%s %s 1> %s" %
                  (bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
        mnemonic = "bin2c",
    )
 _gen_kernel_image_hdr = rule(
    implementation = _gen_kernel_image_hdr_impl,
    output_to_genfiles = True,
    attrs = {
        "op": attr.string(mandatory = True),
        "tile_size": attr.string(mandatory = True),
        "same_shape": attr.string(),
        "out": attr.output(mandatory = True),
        "symbol": attr.string(mandatory = True),
        "gpu_archs": attr.string_list(mandatory = True),
        "_cuda_root": attr.label(
            default = Label("@local_config_cuda//cuda:cuda_root"),
        ),
        "_tool": attr.label(
            executable = True,
            default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"),
            cfg = "host",
        ),
    },
 )
 def gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
    """Generates a C header with fatbin data from a Tensorflow op."""
    if_cuda(
        if_true = [_gen_kernel_image_hdr(
            name = name,
            op = op,
            tile_size = tile_size,
            same_shape = same_shape,
            out = "%s.h" % name,
            symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
            gpu_archs = cuda_gpu_architectures(),
            tags = tags,
        )],
    )
--- a/third_party/gpus/cuda/BUILD.tpl
+++ b/third_party/gpus/cuda/BUILD.tpl
@ -166,6 +166,14 @@ cc_library(
    data = [":cuda-nvvm"],
 )
 filegroup(
    name = "cuda_root",
    srcs = [
        "cuda/bin/fatbinary",
        "cuda/bin/bin2c",
    ],
 )
 bzl_library(
    name = "build_defs_bzl",
    srcs = ["build_defs.bzl"],
--- a/third_party/gpus/cuda/build_defs.bzl.tpl
+++ b/third_party/gpus/cuda/build_defs.bzl.tpl
@ -51,6 +51,10 @@ def cuda_is_configured():
    """Returns true if CUDA was enabled during the configure process."""
    return %{cuda_is_configured}
 def cuda_gpu_architectures():
    """Returns a list of supported GPU architectures."""
    return %{cuda_gpu_architectures}
 def if_cuda_is_configured(x):
    """Tests if the CUDA was enabled during the configure process.
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@ -714,6 +714,7 @@ def _create_dummy_repository(repository_ctx):
        {
            "%{cuda_is_configured}": "False",
            "%{cuda_extra_copts}": "[]",
            "%{cuda_gpu_architectures}": "[]",
        },
    )
    _tpl(
@ -842,6 +843,16 @@ def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
    ]
    return str(capability_flags)
 def _compute_cuda_gpu_architectures(repository_ctx, compute_capabilities):
    gpu_architectures = [
        "sm_" + capability.replace(".", "")
        for capability in compute_capabilities
    ]
    # Make the list unique.
    gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys()
    return str(gpu_architectures)
 def _tpl_path(repository_ctx, filename):
    return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename))
@ -973,6 +984,10 @@ def _create_local_cuda_repository(repository_ctx):
                repository_ctx,
                cuda_config.compute_capabilities,
            ),
            "%{cuda_gpu_architectures}": _compute_cuda_gpu_architectures(
                repository_ctx,
                cuda_config.compute_capabilities,
            ),
        },
    )