Use an input file for tf_to_cubin.
Also use a template file for the mlir definitions of the tensorflow ops. Add another build macro which converts the template to a mlir file by replacing the type placeholders. PiperOrigin-RevId: 315227102 Change-Id: I75cd2109c4b087122ae7ecb6b1d0d5fe35813a25
This commit is contained in:
parent
c7f8f9ecfb
commit
56ef8c1ac2
|
@ -47,6 +47,7 @@ bool ParseStringList(std::string string_list, std::vector<uint32_t>* result) {
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
std::string input_file = "foo.mlir";
|
||||||
std::string output_file = "foo.bin";
|
std::string output_file = "foo.bin";
|
||||||
int32_t architecture = 50;
|
int32_t architecture = 50;
|
||||||
std::vector<uint32_t> tile_sizes;
|
std::vector<uint32_t> tile_sizes;
|
||||||
|
@ -75,6 +76,7 @@ int main(int argc, char** argv) {
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<tensorflow::Flag> flag_list = {
|
std::vector<tensorflow::Flag> flag_list = {
|
||||||
|
tensorflow::Flag("input", &input_file, "input file"),
|
||||||
tensorflow::Flag("output", &output_file, "output file"),
|
tensorflow::Flag("output", &output_file, "output file"),
|
||||||
tensorflow::Flag("arch", &architecture,
|
tensorflow::Flag("arch", &architecture,
|
||||||
"target architecture (e.g. 50 for sm_50)"),
|
"target architecture (e.g. 50 for sm_50)"),
|
||||||
|
@ -94,8 +96,16 @@ int main(int argc, char** argv) {
|
||||||
std::pair<int32_t, int32_t> compute_capability(architecture / 10,
|
std::pair<int32_t, int32_t> compute_capability(architecture / 10,
|
||||||
architecture % 10);
|
architecture % 10);
|
||||||
|
|
||||||
|
std::string tf_code;
|
||||||
|
auto read_status = tensorflow::ReadFileToString(tensorflow::Env::Default(),
|
||||||
|
input_file, &tf_code);
|
||||||
|
if (!read_status.ok()) {
|
||||||
|
LOG(ERROR) << read_status;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
auto cubin = tensorflow::kernel_gen::GenerateCubinForTfCode(
|
auto cubin = tensorflow::kernel_gen::GenerateCubinForTfCode(
|
||||||
argv[1], compute_capability, tile_sizes, same_shape, unroll_factors);
|
tf_code, compute_capability, tile_sizes, same_shape, unroll_factors);
|
||||||
|
|
||||||
if (!cubin.ok()) {
|
if (!cubin.ok()) {
|
||||||
LOG(ERROR) << cubin.status();
|
LOG(ERROR) << cubin.status();
|
||||||
|
|
|
@ -6,18 +6,8 @@ package(
|
||||||
licenses = ["notice"], # Apache 2.0
|
licenses = ["notice"], # Apache 2.0
|
||||||
)
|
)
|
||||||
|
|
||||||
bias_add_kernel = """
|
|
||||||
func @bias_add(%arg0: tensor<?x?xf99>,
|
|
||||||
%arg1: tensor<?xf99>) -> tensor<?x?xf99> {
|
|
||||||
%0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" }
|
|
||||||
: (tensor<?x?xf99>, tensor<?xf99>) -> tensor<?x?xf99>
|
|
||||||
return %0 : tensor<?x?xf99>
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
gen_kernel_library(
|
gen_kernel_library(
|
||||||
name = "bias_add",
|
name = "bias_add",
|
||||||
op = bias_add_kernel,
|
|
||||||
same_shape = "0,2",
|
same_shape = "0,2",
|
||||||
tile_size = "16x16",
|
tile_size = "16x16",
|
||||||
types = [
|
types = [
|
||||||
|
@ -27,17 +17,8 @@ gen_kernel_library(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
relu_kernel = """
|
|
||||||
func @relu(%arg0: tensor<?xf99>) -> tensor<?xf99> {
|
|
||||||
%0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" }
|
|
||||||
: (tensor<?xf99>) -> tensor<?xf99>
|
|
||||||
return %0 : tensor<?xf99>
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
gen_kernel_library(
|
gen_kernel_library(
|
||||||
name = "relu",
|
name = "relu",
|
||||||
op = relu_kernel,
|
|
||||||
same_shape = "0,1",
|
same_shape = "0,1",
|
||||||
tile_size = "256",
|
tile_size = "256",
|
||||||
types = [
|
types = [
|
||||||
|
@ -47,17 +28,8 @@ gen_kernel_library(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
tanh_kernel = """
|
|
||||||
func @tanh(%arg0: tensor<?xf99>) -> tensor<?xf99> {
|
|
||||||
%0 = "tf.Tanh"(%arg0) { T = "tfdtype$DT_TYPE" }
|
|
||||||
: (tensor<?xf99>) -> tensor<?xf99>
|
|
||||||
return %0 : tensor<?xf99>
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
gen_kernel_library(
|
gen_kernel_library(
|
||||||
name = "tanh",
|
name = "tanh",
|
||||||
op = tanh_kernel,
|
|
||||||
tile_size = "256",
|
tile_size = "256",
|
||||||
types = [
|
types = [
|
||||||
"f32",
|
"f32",
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
func @bias_add(%arg0: tensor<?x?xf99>,
|
||||||
|
%arg1: tensor<?xf99>) -> tensor<?x?xf99> {
|
||||||
|
%0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" }
|
||||||
|
: (tensor<?x?xf99>, tensor<?xf99>) -> tensor<?x?xf99>
|
||||||
|
return %0 : tensor<?x?xf99>
|
||||||
|
}
|
|
@ -27,13 +27,14 @@ def _gen_kernel_image_hdr_impl(ctx):
|
||||||
filename = "%s.%s.cubin" % (name, arch)
|
filename = "%s.%s.cubin" % (name, arch)
|
||||||
cubin = ctx.actions.declare_file(filename)
|
cubin = ctx.actions.declare_file(filename)
|
||||||
ctx.actions.run(
|
ctx.actions.run(
|
||||||
|
inputs = [ctx.file.mlir_op],
|
||||||
outputs = [cubin],
|
outputs = [cubin],
|
||||||
executable = ctx.executable._tool,
|
executable = ctx.executable._tool,
|
||||||
arguments = same_shape + [
|
arguments = same_shape + [
|
||||||
"--tile_sizes=%s" % tile_sizes,
|
"--tile_sizes=%s" % tile_sizes,
|
||||||
"--arch=%s" % arch.split("_")[1],
|
"--arch=%s" % arch.split("_")[1],
|
||||||
|
"--input=%s" % ctx.file.mlir_op.path,
|
||||||
"--output=%s" % cubin.path,
|
"--output=%s" % cubin.path,
|
||||||
ctx.attr.op,
|
|
||||||
],
|
],
|
||||||
mnemonic = "compile",
|
mnemonic = "compile",
|
||||||
)
|
)
|
||||||
|
@ -70,7 +71,7 @@ _gen_kernel_image_hdr_rule = rule(
|
||||||
implementation = _gen_kernel_image_hdr_impl,
|
implementation = _gen_kernel_image_hdr_impl,
|
||||||
output_to_genfiles = True,
|
output_to_genfiles = True,
|
||||||
attrs = {
|
attrs = {
|
||||||
"op": attr.string(mandatory = True),
|
"mlir_op": attr.label(mandatory = True, allow_single_file = True),
|
||||||
"tile_size": attr.string(mandatory = True),
|
"tile_size": attr.string(mandatory = True),
|
||||||
"same_shape": attr.string(),
|
"same_shape": attr.string(),
|
||||||
"out": attr.output(mandatory = True),
|
"out": attr.output(mandatory = True),
|
||||||
|
@ -87,12 +88,12 @@ _gen_kernel_image_hdr_rule = rule(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
def _gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
|
def _gen_kernel_image_hdr(name, mlir_op, tile_size, tags = [], same_shape = None):
|
||||||
"""Generates a C header with fatbin data from a Tensorflow op."""
|
"""Generates a C header with fatbin data from a Tensorflow op."""
|
||||||
if cuda_gpu_architectures():
|
if cuda_gpu_architectures():
|
||||||
_gen_kernel_image_hdr_rule(
|
_gen_kernel_image_hdr_rule(
|
||||||
name = name,
|
name = name,
|
||||||
op = op,
|
mlir_op = mlir_op,
|
||||||
tile_size = tile_size,
|
tile_size = tile_size,
|
||||||
same_shape = same_shape,
|
same_shape = same_shape,
|
||||||
out = "%s.h" % name,
|
out = "%s.h" % name,
|
||||||
|
@ -101,17 +102,61 @@ def _gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
|
||||||
tags = tags,
|
tags = tags,
|
||||||
)
|
)
|
||||||
|
|
||||||
def gen_kernel_library(name, op, types, tile_size, tags = [], same_shape = None):
|
def _gen_mlir_op_impl(ctx):
|
||||||
if cuda_gpu_architectures():
|
|
||||||
type_to_dtype = {
|
type_to_dtype = {
|
||||||
"f16": "DT_HALF",
|
"f16": "DT_HALF",
|
||||||
"f32": "DT_FLOAT",
|
"f32": "DT_FLOAT",
|
||||||
"f64": "DT_DOUBLE",
|
"f64": "DT_DOUBLE",
|
||||||
}
|
}
|
||||||
|
ctx.actions.run_shell(
|
||||||
|
inputs = [ctx.file.template],
|
||||||
|
outputs = [ctx.outputs.out],
|
||||||
|
command = "cat %s | sed s/f99/%s/g | sed s/DT_DTYPE/%s/g > %s" % (
|
||||||
|
ctx.file.template.path,
|
||||||
|
ctx.attr.type,
|
||||||
|
type_to_dtype[ctx.attr.type],
|
||||||
|
ctx.outputs.out.path,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
_gen_mlir_op_rule = rule(
|
||||||
|
implementation = _gen_mlir_op_impl,
|
||||||
|
output_to_genfiles = True,
|
||||||
|
attrs = {
|
||||||
|
"template": attr.label(mandatory = True, allow_single_file = True),
|
||||||
|
"type": attr.string(mandatory = True),
|
||||||
|
"out": attr.output(mandatory = True),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _gen_mlir_op(name, type):
|
||||||
|
_gen_mlir_op_rule(
|
||||||
|
name = "generate_{name}_{type}_mlir".format(name = name, type = type),
|
||||||
|
template = "{name}.mlir.tmpl".format(name = name),
|
||||||
|
type = type,
|
||||||
|
out = "{name}_{type}.mlir".format(name = name, type = type),
|
||||||
|
)
|
||||||
|
|
||||||
|
def gen_kernel_library(name, types, tile_size, tags = [], same_shape = None):
|
||||||
|
""" Generate a library with kernels for a specific tensorflow op.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: The name of the tensorflow op.
|
||||||
|
types: The types ("f16", "f32", "f64") for which a kernel should be generated.
|
||||||
|
tile_size: The tiling specification, e.g. "16x16".
|
||||||
|
tags: The tags which should be added to the library.
|
||||||
|
same_shape: The information about which shapes are the same, e.g. "0,1".
|
||||||
|
"""
|
||||||
|
|
||||||
|
if cuda_gpu_architectures():
|
||||||
for type in types:
|
for type in types:
|
||||||
|
_gen_mlir_op(
|
||||||
|
name = name,
|
||||||
|
type = type,
|
||||||
|
)
|
||||||
_gen_kernel_image_hdr(
|
_gen_kernel_image_hdr(
|
||||||
name = "{name}_{type}_kernel".format(name = name, type = type),
|
name = "{name}_{type}_kernel".format(name = name, type = type),
|
||||||
op = op.replace("f99", type).replace("DT_TYPE", type_to_dtype[type]),
|
mlir_op = "{name}_{type}.mlir".format(name = name, type = type),
|
||||||
tile_size = tile_size,
|
tile_size = tile_size,
|
||||||
tags = tags,
|
tags = tags,
|
||||||
same_shape = same_shape,
|
same_shape = same_shape,
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
func @relu(%arg0: tensor<?xf99>) -> tensor<?xf99> {
|
||||||
|
%0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" }
|
||||||
|
: (tensor<?xf99>) -> tensor<?xf99>
|
||||||
|
return %0 : tensor<?xf99>
|
||||||
|
}
|
|
@ -0,0 +1,5 @@
|
||||||
|
func @tanh(%arg0: tensor<?xf99>) -> tensor<?xf99> {
|
||||||
|
%0 = "tf.Tanh"(%arg0) { T = "tfdtype$DT_DTYPE" }
|
||||||
|
: (tensor<?xf99>) -> tensor<?xf99>
|
||||||
|
return %0 : tensor<?xf99>
|
||||||
|
}
|
Loading…
Reference in New Issue