Add Starlark rules to generate cubin headers.
Also add a cuda_gpu_architectures macro for getting a list of CUDA GPU architectures. PiperOrigin-RevId: 311494598 Change-Id: Ie573c2d22a42ab9e0002bdcfbee5be534b87cd2c
This commit is contained in:
parent
e10d6dd07b
commit
23d478c422
|
@ -40,6 +40,7 @@ cc_library(
|
||||||
tf_cc_binary(
|
tf_cc_binary(
|
||||||
name = "tf_to_cubin",
|
name = "tf_to_cubin",
|
||||||
srcs = ["tf_to_cubin.cc"],
|
srcs = ["tf_to_cubin.cc"],
|
||||||
|
visibility = ["//tensorflow/core/kernels/cubin_headers:__pkg__"],
|
||||||
deps = [
|
deps = [
|
||||||
":cubin_creator",
|
":cubin_creator",
|
||||||
"//tensorflow/core:framework_internal",
|
"//tensorflow/core:framework_internal",
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
"""Generates cubin headers for TF dialect ops."""
|
||||||
|
|
||||||
|
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cuda")
|
||||||
|
|
||||||
|
def _lookup_file(filegroup, path):
|
||||||
|
"""Extracts file at (relative) path in filegroup."""
|
||||||
|
for file in filegroup.files.to_list():
|
||||||
|
if file.path.endswith(path):
|
||||||
|
return file
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _gen_kernel_image_hdr_impl(ctx):
|
||||||
|
if not ctx.attr.gpu_archs:
|
||||||
|
fail("No GPU architecture specified, use --config=cuda or similar")
|
||||||
|
|
||||||
|
name = ctx.attr.name
|
||||||
|
tile_sizes = ctx.attr.tile_size.replace("x", ",")
|
||||||
|
same_shape = []
|
||||||
|
if ctx.attr.same_shape:
|
||||||
|
same_shape.append("--same_shape=%s" % ctx.attr.same_shape)
|
||||||
|
|
||||||
|
cubins = []
|
||||||
|
images = []
|
||||||
|
for arch in ctx.attr.gpu_archs:
|
||||||
|
filename = "%s.%s.cubin" % (name, arch)
|
||||||
|
cubin = ctx.actions.declare_file(filename)
|
||||||
|
ctx.actions.run(
|
||||||
|
outputs = [cubin],
|
||||||
|
executable = ctx.executable._tool,
|
||||||
|
arguments = same_shape + [
|
||||||
|
"--tile_sizes=%s" % tile_sizes,
|
||||||
|
"--arch=%s" % arch.split("_")[1],
|
||||||
|
"--output=%s" % cubin.path,
|
||||||
|
ctx.attr.op,
|
||||||
|
],
|
||||||
|
mnemonic = "compile",
|
||||||
|
)
|
||||||
|
cubins.append(cubin)
|
||||||
|
images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
|
||||||
|
|
||||||
|
# Generate fatbin file from all cubins.
|
||||||
|
fatbin = ctx.actions.declare_file("%s.fatbin" % name)
|
||||||
|
ctx.actions.run(
|
||||||
|
outputs = [fatbin],
|
||||||
|
inputs = cubins,
|
||||||
|
executable = _lookup_file(ctx.attr._cuda_root, "bin/fatbinary"),
|
||||||
|
arguments = [
|
||||||
|
"--64",
|
||||||
|
"--cmdline=--compile-only",
|
||||||
|
"--link",
|
||||||
|
"--compress-all",
|
||||||
|
"--create=%s" % fatbin.path,
|
||||||
|
] + images,
|
||||||
|
mnemonic = "fatbinary",
|
||||||
|
)
|
||||||
|
|
||||||
|
bin2c = _lookup_file(ctx.attr._cuda_root, "bin/bin2c")
|
||||||
|
ctx.actions.run_shell(
|
||||||
|
outputs = [ctx.outputs.out],
|
||||||
|
inputs = [fatbin],
|
||||||
|
tools = [bin2c],
|
||||||
|
command = "%s --static --const --type=int --name=%s %s 1> %s" %
|
||||||
|
(bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
|
||||||
|
mnemonic = "bin2c",
|
||||||
|
)
|
||||||
|
|
||||||
|
_gen_kernel_image_hdr = rule(
|
||||||
|
implementation = _gen_kernel_image_hdr_impl,
|
||||||
|
output_to_genfiles = True,
|
||||||
|
attrs = {
|
||||||
|
"op": attr.string(mandatory = True),
|
||||||
|
"tile_size": attr.string(mandatory = True),
|
||||||
|
"same_shape": attr.string(),
|
||||||
|
"out": attr.output(mandatory = True),
|
||||||
|
"symbol": attr.string(mandatory = True),
|
||||||
|
"gpu_archs": attr.string_list(mandatory = True),
|
||||||
|
"_cuda_root": attr.label(
|
||||||
|
default = Label("@local_config_cuda//cuda:cuda_root"),
|
||||||
|
),
|
||||||
|
"_tool": attr.label(
|
||||||
|
executable = True,
|
||||||
|
default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_cubin"),
|
||||||
|
cfg = "host",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
|
||||||
|
"""Generates a C header with fatbin data from a Tensorflow op."""
|
||||||
|
if_cuda(
|
||||||
|
if_true = [_gen_kernel_image_hdr(
|
||||||
|
name = name,
|
||||||
|
op = op,
|
||||||
|
tile_size = tile_size,
|
||||||
|
same_shape = same_shape,
|
||||||
|
out = "%s.h" % name,
|
||||||
|
symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
|
||||||
|
gpu_archs = cuda_gpu_architectures(),
|
||||||
|
tags = tags,
|
||||||
|
)],
|
||||||
|
)
|
|
@ -166,6 +166,14 @@ cc_library(
|
||||||
data = [":cuda-nvvm"],
|
data = [":cuda-nvvm"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
filegroup(
|
||||||
|
name = "cuda_root",
|
||||||
|
srcs = [
|
||||||
|
"cuda/bin/fatbinary",
|
||||||
|
"cuda/bin/bin2c",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
bzl_library(
|
bzl_library(
|
||||||
name = "build_defs_bzl",
|
name = "build_defs_bzl",
|
||||||
srcs = ["build_defs.bzl"],
|
srcs = ["build_defs.bzl"],
|
||||||
|
|
|
@ -51,6 +51,10 @@ def cuda_is_configured():
|
||||||
"""Returns true if CUDA was enabled during the configure process."""
|
"""Returns true if CUDA was enabled during the configure process."""
|
||||||
return %{cuda_is_configured}
|
return %{cuda_is_configured}
|
||||||
|
|
||||||
|
def cuda_gpu_architectures():
|
||||||
|
"""Returns a list of supported GPU architectures."""
|
||||||
|
return %{cuda_gpu_architectures}
|
||||||
|
|
||||||
def if_cuda_is_configured(x):
|
def if_cuda_is_configured(x):
|
||||||
"""Tests if the CUDA was enabled during the configure process.
|
"""Tests if the CUDA was enabled during the configure process.
|
||||||
|
|
||||||
|
|
|
@ -714,6 +714,7 @@ def _create_dummy_repository(repository_ctx):
|
||||||
{
|
{
|
||||||
"%{cuda_is_configured}": "False",
|
"%{cuda_is_configured}": "False",
|
||||||
"%{cuda_extra_copts}": "[]",
|
"%{cuda_extra_copts}": "[]",
|
||||||
|
"%{cuda_gpu_architectures}": "[]",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
_tpl(
|
_tpl(
|
||||||
|
@ -842,6 +843,16 @@ def _compute_cuda_extra_copts(repository_ctx, compute_capabilities):
|
||||||
]
|
]
|
||||||
return str(capability_flags)
|
return str(capability_flags)
|
||||||
|
|
||||||
|
def _compute_cuda_gpu_architectures(repository_ctx, compute_capabilities):
|
||||||
|
gpu_architectures = [
|
||||||
|
"sm_" + capability.replace(".", "")
|
||||||
|
for capability in compute_capabilities
|
||||||
|
]
|
||||||
|
|
||||||
|
# Make the list unique.
|
||||||
|
gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys()
|
||||||
|
return str(gpu_architectures)
|
||||||
|
|
||||||
def _tpl_path(repository_ctx, filename):
|
def _tpl_path(repository_ctx, filename):
|
||||||
return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename))
|
return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename))
|
||||||
|
|
||||||
|
@ -973,6 +984,10 @@ def _create_local_cuda_repository(repository_ctx):
|
||||||
repository_ctx,
|
repository_ctx,
|
||||||
cuda_config.compute_capabilities,
|
cuda_config.compute_capabilities,
|
||||||
),
|
),
|
||||||
|
"%{cuda_gpu_architectures}": _compute_cuda_gpu_architectures(
|
||||||
|
repository_ctx,
|
||||||
|
cuda_config.compute_capabilities,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue