Initial implementation of gentbl as a Starlark rule

This should avoid having to list all transitive dependencies in every
invocation of gentbl and enable much nicer includes specification. This
is designed to allow a drop-in replacement to ease transition to the
new functionality. All existing usages of gentbl should also work with
this rule. I tested out fully transitioning the current build rules to
use td_library, but am going to submit this as just the change to
the rule, with the BUILD file changes coming later to make things more
incremental.

PiperOrigin-RevId: 360456873
Change-Id: I1b5001e925bf8656ab69235326f85683983ffe5c
This commit is contained in:
Geoffrey Martin-Noble 2021-03-02 10:14:15 -08:00 committed by TensorFlower Gardener
parent e3f3916519
commit 8c1068332b
3 changed files with 364 additions and 77 deletions

View File

@ -626,6 +626,7 @@ tf_native_cc_binary(
srcs = [
"converter_gen.cc",
],
compatible_with = get_compatible_with_cloud(),
deps = [
"@llvm-project//llvm:Support",
"@llvm-project//llvm:TableGen",

View File

@ -163,11 +163,11 @@ gentbl(
tf_ops_category_list = [
{
"name": "ops_a_m",
"include": "tf.[A-M].*$$",
"include": "tf.[A-M].*$",
},
{
"name": "ops_n_z",
"include": "tf.[N-Z].*$$",
"include": "tf.[N-Z].*$",
},
]
@ -177,11 +177,11 @@ tf_ops_category_list = [
compatible_with = get_compatible_with_cloud(),
tbl_outs = [
(
"-gen-op-decls -op-include-regex='" + target["include"] + "'",
"-gen-op-decls -op-include-regex=" + target["include"],
"ir/tf_" + target["name"] + ".h.inc",
),
(
"-gen-op-defs -op-include-regex='" + target["include"] + "'",
"-gen-op-defs -op-include-regex=" + target["include"],
"ir/tf_" + target["name"] + ".cc.inc",
),
],
@ -198,11 +198,11 @@ gentbl(
compatible_with = get_compatible_with_cloud(),
tbl_outs = [
(
"-gen-op-decls -op-exclude-regex='" + "|".join([target["include"] for target in tf_ops_category_list]) + "' ",
"-gen-op-decls -op-exclude-regex=" + "|".join([target["include"] for target in tf_ops_category_list]),
"ir/tf_remaining_ops.h.inc",
),
(
"-gen-op-defs -op-exclude-regex='" + "|".join([target["include"] for target in tf_ops_category_list]) + "' ",
"-gen-op-defs -op-exclude-regex=" + "|".join([target["include"] for target in tf_ops_category_list]),
"ir/tf_remaining_ops.cc.inc",
),
],

View File

@ -1,88 +1,373 @@
"""BUILD extensions for MLIR table generation."""
def gentbl(name, tblgen, td_file, tbl_outs, td_srcs = [], td_includes = [], td_relative_includes = [], strip_include_prefix = None, test = False, **kwargs):
"""gentbl() generates tabular code from a table definition file.
TdInfo = provider(
"Holds tablegen files and the dependencies and include paths necessary to" +
" build them.",
fields = {
"transitive_sources": "td files transitively used by this rule.",
"transitive_includes": (
"include arguments to add to the final tablegen invocation. These" +
" are the absolute directory paths that will be added with '-I'."
),
},
)
# For now we allow anything that provides DefaultInfo to just forward its files.
# In particular, this allows filegroups to be used. This is mostly to ease
# transition. In the future, the TdInfo provider will be required.
# TODO(gcmn): Switch to enforcing TdInfo provider.
def _get_dep_transitive_srcs(dep):
"""Extract TdInfo.transitive_sources, falling back to DefaultInfo.files."""
if TdInfo in dep:
return dep[TdInfo].transitive_sources
return dep[DefaultInfo].files
def _get_dep_transitive_includes(dep):
"""Extract TdInfo.transitive_includes, falling back to an empty depset()."""
if TdInfo in dep:
return dep[TdInfo].transitive_includes
return depset()
def _get_transitive_srcs(srcs, deps):
"""Obtain the source files for a target and its transitive dependencies.
Args:
name: The name of the build rule for use in dependencies.
srcs: a list of source files
deps: a list of targets that are direct dependencies
Returns:
a collection of the transitive sources
"""
return depset(
direct = srcs,
transitive = [_get_dep_transitive_srcs(dep) for dep in deps],
)
def _get_transitive_includes(includes, deps):
"""Obtain the includes paths for a target and its transitive dependencies.
Args:
includes: a list of include paths
deps: a list of targets that are direct dependencies
Returns:
a collection of the transitive include paths
"""
return depset(
direct = includes,
transitive = [_get_dep_transitive_includes(dep) for dep in deps],
)
def _prefix_roots(ctx, includes):
"""Map the given includes to be relative to all root directories.
This will expand them to be relative to all the root directories available
in the execution environment for ctx.run (bin and genfiles in addition to
the normal source root)
"""
prefixed_includes = []
for include in includes:
prefixed_includes.append(include)
prefixed_includes.append(ctx.genfiles_dir.path + "/" + include)
prefixed_includes.append(ctx.bin_dir.path + "/" + include)
return prefixed_includes
def _resolve_includes(ctx, includes):
"""Resolves include paths to paths relative to the execution root.
Relative paths are interpreted as relative to the current label's package.
Absolute paths are interpreted as relative to the current label's workspace
root."""
package = ctx.label.package
workspace_root = ctx.label.workspace_root
workspace_root = workspace_root if workspace_root else "."
resolved_includes = []
for include in includes:
if not include.startswith("/"):
include = "/" + package + "/" + include
include = workspace_root + include
resolved_includes.extend(_prefix_roots(ctx, [include]))
return resolved_includes
def _td_library_impl(ctx):
trans_srcs = _get_transitive_srcs(ctx.files.srcs, ctx.attr.deps)
trans_includes = _get_transitive_includes(
_resolve_includes(ctx, ctx.attr.includes),
ctx.attr.deps,
)
return [
DefaultInfo(files = trans_srcs),
TdInfo(
transitive_sources = trans_srcs,
transitive_includes = trans_includes,
),
]
td_library = rule(
_td_library_impl,
attrs = {
"srcs": attr.label_list(allow_files = True),
"includes": attr.string_list(
doc = "Include paths to be added to the final tablegen tool" +
" invocation. Relative paths are interpreted as relative to" +
" the current label's package. Absolute paths are" +
" interpreted as relative to the current label's workspace",
),
# TODO(gcmn): limit to TdInfo providers.
"deps": attr.label_list(
doc = "Dependencies providing tablegen source files and include" +
" paths.",
),
},
)
def _gentbl_rule_impl(ctx):
td_file = ctx.file.td_file
trans_srcs = _get_transitive_srcs(
ctx.files.td_srcs + [td_file],
ctx.attr.deps,
)
# Note that we have two types of includes here. The deprecated ones expanded
# only by "_prefix_roots" are already relative to the execution root, i.e.
# may contain an `external/<workspace_name>` prefix if the current workspace
# is not the main workspace (where workspace_name is something configured
# per-project and therefore generally not known). Note that dirname also
# already includes this prefix. The new style of includes have it prepended
# automatically by `_resolve_includes` to avoid BUILD files having to depend
# on project specific configurations and Bazel implementation details.
trans_includes = _get_transitive_includes(
_resolve_includes(ctx, ctx.attr.includes + ["/"]) +
_prefix_roots(ctx, ctx.attr.td_includes + [td_file.dirname]),
ctx.attr.deps,
)
args = ctx.actions.args()
args.add_all(ctx.attr.opts)
args.add(td_file)
args.add_all(trans_includes, before_each = "-I")
args.add("-o", ctx.outputs.out.path)
ctx.actions.run(
outputs = [ctx.outputs.out],
inputs = trans_srcs,
executable = ctx.executable.tblgen,
arguments = [args],
)
return [DefaultInfo()]
gentbl_rule = rule(
_gentbl_rule_impl,
doc = "Generates tabular code from a table definition file.",
# Match genrule behavior
output_to_genfiles = True,
attrs = {
"tblgen": attr.label(
doc = "The tablegen executable with which to generate `out`.",
executable = True,
cfg = "exec",
),
"td_file": attr.label(
doc = "The tablegen file to run through `tblgen`.",
allow_single_file = True,
mandatory = True,
),
"td_srcs": attr.label_list(
doc = "Additional tablegen files included by `td_file`. It is not" +
" necessary to list td_file here (though not an error).",
allow_files = True,
),
# TODO(gcmn): limit to TdInfo providers.
"deps": attr.label_list(
doc = "Dependencies providing tablegen source files and include" +
" paths.",
),
"out": attr.output(
doc = "The output file for the tablegen invocation.",
mandatory = True,
),
"opts": attr.string_list(
doc = "Additional command line options to add to the tablegen" +
" invocation. For include arguments, prefer to use" +
" `includes`.",
),
"includes": attr.string_list(
doc = "Include paths to be added to the final tablegen tool" +
" invocation. Relative paths are interpreted as relative to" +
" the current label's package. Absolute paths are" +
" interpreted as relative to the current label's workspace." +
" Includes are applied from all roots available in the" +
" execution environment (source, genfiles, and bin" +
" directories). The execution roots themselves and the " +
" directory of td_file are always added.",
),
"td_includes": attr.string_list(
doc = "Include paths to add to the tablegen invocation. Paths are" +
" interpreted as relative to the current label's workspace" +
" root and applied from all roots available in the" +
" execution environment (source, genfiles, and bin" +
" directories). Deprecated. Use `includes` instead.",
),
},
)
# TODO(gcmn): Figure out how to reduce duplication with _gentbl_rule_impl
def _gentbl_test_impl(ctx):
td_file = ctx.file.td_file
trans_srcs = _get_transitive_srcs(
ctx.files.td_srcs + [td_file],
ctx.attr.deps,
)
# Note that we have two types of includes here. The deprecated ones expanded
# only by "_prefix_roots" are already relative to the execution root, i.e.
# may contain an `external/<workspace_name>` prefix if the current workspace
# is not the main workspace (where workspace_name is something configured
# per-project and therefore generally not known). Note that dirname also
# already includes this prefix. The new style of includes have it prepended
# automatically by `_resolve_includes` to avoid BUILD files having to depend
# on project specific configurations and Bazel implementation details.
trans_includes = _get_transitive_includes(
_resolve_includes(ctx, ctx.attr.includes + ["/"]) +
_prefix_roots(ctx, ctx.attr.td_includes + [td_file.dirname]),
ctx.attr.deps,
)
test_args = [ctx.executable.tblgen.short_path]
test_args.extend(ctx.attr.opts)
test_args.append(td_file.path)
test_args.extend(["-I " + include for include in trans_includes.to_list()])
test_args.extend(["-o", "/dev/null"])
ctx.actions.write(
ctx.outputs.executable,
content = " ".join(test_args),
is_executable = True,
)
return [DefaultInfo(
runfiles = ctx.runfiles(
[ctx.executable.tblgen],
transitive_files = trans_srcs,
),
)]
gentbl_test = rule(
_gentbl_test_impl,
test = True,
doc = "A shell test that tests the given tablegen invocation. Note" +
" that unlike gentbl_rule, this builds and invokes `tblgen` in the" +
" target configuration. Takes all the same arguments as gentbl_rule" +
" except for `out` (as it does not generate any output)",
# Match genrule behavior
output_to_genfiles = True,
attrs = {
"tblgen": attr.label(
doc = "The tablegen executable run in the shell command. Note" +
" that this is built in the target configuration.",
executable = True,
cfg = "target",
),
"td_file": attr.label(
doc = "See gentbl_rule.td_file",
allow_single_file = True,
mandatory = True,
),
"td_srcs": attr.label_list(
doc = "See gentbl_rule.td_srcs",
allow_files = True,
),
"deps": attr.label_list(doc = "See gentbl_rule.deps"),
"opts": attr.string_list(doc = "See gentbl_rule.opts"),
"includes": attr.string_list(doc = "See gentbl_rule.includes"),
"td_includes": attr.string_list(doc = "See gentbl_rule.td_includes"),
},
)
def gentbl(
name,
tblgen,
td_file,
tbl_outs,
td_srcs = [],
td_includes = [],
includes = [],
td_relative_includes = [],
deps = [],
strip_include_prefix = None,
test = False,
**kwargs):
"""Create multiple tablegen generated files using the same tool and input.
All generated outputs are bundled in a cc_library rule.
Args:
name: The name of the generated cc_library rule for use in dependencies.
tblgen: The binary used to produce the output.
td_file: The primary table definitions file.
tbl_outs: A list of tuples (opts, out), where each opts is a string of
options passed to tblgen, and the out is the corresponding output file
produced.
td_srcs: A list of table definition files included transitively.
td_includes: A list of include paths for relative includes, provided as build targets.
td_relative_includes: A list of include paths for relative includes, provided as relative path.
strip_include_prefix: Attribute to pass through to cc_library.
test: Whether to create a test to invoke the tool too.
**kwargs: Extra keyword arguments to pass to native rules such as cc_library below.
td_srcs: See gentbl_rule.td_srcs
includes: See gentbl_rule.includes
td_includes: See gentbl_rule.td_includes
td_relative_includes: An alias for "includes". Deprecated. Use includes
instead.
deps: See gentbl_rule.deps
strip_include_prefix: attribute to pass through to cc_library.
test: whether to create a shell test that invokes the tool too.
**kwargs: Extra keyword arguments to pass to all generated rules.
"""
srcs = []
srcs += td_srcs
if td_file not in td_srcs:
srcs += [td_file]
for (opts_string, out) in tbl_outs:
# TODO(gcmn): The API of opts as single string is preserved for backward
# compatibility. Change to taking a sequence.
opts = opts_string.split(" ") if opts_string else []
td_includes_cmd = [
"-I external/llvm-project/mlir/include -I external/org_tensorflow",
"-I $(GENDIR)/external/llvm-project/mlir/include -I $(GENDIR)/external/org_tensorflow",
]
for td_include in td_includes:
td_includes_cmd += [
"-I%s" % td_include,
"-I$(GENDIR)/%s" % td_include,
]
for td_include in td_relative_includes:
td_includes_cmd += [
"-I%s/%s -Iexternal/org_tensorflow/%s/%s" % (native.package_name(), td_include, native.package_name(), td_include),
"-I$(GENDIR)/%s/%s" % (native.package_name(), td_include),
]
# Filter out empty options
opts = [opt for opt in opts if opt]
local_inc = "-I $$(dirname $(location %s))" % td_file
if test:
# Rule to generate shell script to invoke tblgen. This generates a very
# bare shell file which the sh_test uses.
native.genrule(
name = "%s_genrule_sh" % name,
srcs = srcs,
outs = ["%s.gen.sh" % name],
cmd = ("echo \"\\$$1\" %s \\$${@:2} -o /dev/null > $@" % local_inc),
executable = 1,
first_opt = opts[0] if opts else ""
rule_suffix = "_{}_{}".format(
first_opt.replace("-", "_").replace("=", "_"),
str(hash(opts_string)),
)
gentbl_name = "%s_%s_genrule" % (name, rule_suffix)
gentbl_rule(
name = gentbl_name,
td_file = td_file,
tblgen = tblgen,
opts = opts,
td_srcs = td_srcs,
deps = deps,
includes = includes + td_relative_includes,
# TODO(gcmn): Update callers to td_library and explicit includes and
# drop this hardcoded include.
td_includes = td_includes + [
"external/llvm-project/mlir/include",
],
out = out,
**kwargs
)
for (opts, out) in tbl_outs:
# All arguments to generate the output except output destination.
base_args = [
"$(location %s)" % tblgen,
"%s" % opts,
"$(location %s)" % td_file,
"-I$(GENDIR)",
] + td_includes_cmd
first_opt = opts.split(" ", 1)[0]
rule_suffix = "_{}_{}".format(first_opt.replace("-", "_").replace("=", "_"), str(hash(opts)))
# Rule to generate code using generated shell script.
native.genrule(
name = "%s_%s_genrule" % (name, rule_suffix),
srcs = srcs,
outs = [out],
tools = [tblgen],
message = "Generating code from table: %s" % td_file,
cmd = (" ".join(base_args) + " %s -o $@" % local_inc),
**kwargs
)
# Optionally generate rule to test tblgen invocation.
# Disable these on windows, because $(location ...) does not seem to
# work as expected on windows.
if test:
native.sh_test(
name = "%s_%s_genrule_test" % (name, rule_suffix),
srcs = ["%s.gen.sh" % name],
args = base_args,
data = srcs + [tblgen],
tags = ["no_windows"],
# Also run the generator in the target configuration as a test. This
# means it gets run with asserts and sanitizers and such when they
# are enabled and is counted in coverage.
gentbl_test(
name = "%s_test" % (gentbl_name,),
td_file = td_file,
tblgen = tblgen,
opts = opts,
td_srcs = td_srcs,
deps = deps,
includes = includes + td_relative_includes,
# TODO(gcmn): Update callers to td_library and explicit includes
# and drop this hardcoded include.
td_includes = td_includes + [
"external/llvm-project/mlir/include",
],
**kwargs
)
@ -91,7 +376,8 @@ def gentbl(name, tblgen, td_file, tbl_outs, td_srcs = [], td_includes = [], td_r
hdrs = [f for (opts, f) in tbl_outs if opts not in skip_opts]
native.cc_library(
name = name,
# include_prefix does not apply to textual_hdrs.
# strip_include_prefix does not apply to textual_hdrs.
# https://github.com/bazelbuild/bazel/issues/12424
hdrs = hdrs if strip_include_prefix else [],
strip_include_prefix = strip_include_prefix,
textual_hdrs = hdrs,