Use an input file for tf_to_cubin.

Also use a template file for the mlir definitions of the tensorflow ops. Add another build macro which converts the template to a mlir file by replacing the type placeholders. PiperOrigin-RevId: 315227102 Change-Id: I75cd2109c4b087122ae7ecb6b1d0d5fe35813a25
2020-06-08 00:57:53 -07:00 · 2020-06-08 00:57:53 -07:00 · 56ef8c1ac2
parent c7f8f9ecfb
commit 56ef8c1ac2
6 changed files with 83 additions and 40 deletions
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
@ -47,6 +47,7 @@ bool ParseStringList(std::string string_list, std::vector<uint32_t>* result) {
 }  // namespace
 int main(int argc, char** argv) {
  std::string input_file = "foo.mlir";
  std::string output_file = "foo.bin";
  int32_t architecture = 50;
  std::vector<uint32_t> tile_sizes;
@ -75,6 +76,7 @@ int main(int argc, char** argv) {
  };
  std::vector<tensorflow::Flag> flag_list = {
      tensorflow::Flag("input", &input_file, "input file"),
      tensorflow::Flag("output", &output_file, "output file"),
      tensorflow::Flag("arch", &architecture,
                       "target architecture (e.g. 50 for sm_50)"),
@ -94,8 +96,16 @@ int main(int argc, char** argv) {
  std::pair<int32_t, int32_t> compute_capability(architecture / 10,
                                                 architecture % 10);
  std::string tf_code;
  auto read_status = tensorflow::ReadFileToString(tensorflow::Env::Default(),
                                                  input_file, &tf_code);
  if (!read_status.ok()) {
    LOG(ERROR) << read_status;
    return 1;
  }
  auto cubin = tensorflow::kernel_gen::GenerateCubinForTfCode(
-      argv[1], compute_capability, tile_sizes, same_shape, unroll_factors);
+      tf_code, compute_capability, tile_sizes, same_shape, unroll_factors);
  if (!cubin.ok()) {
    LOG(ERROR) << cubin.status();
--- a/tensorflow/core/kernels/cubin_headers/BUILD
+++ b/tensorflow/core/kernels/cubin_headers/BUILD
@ -6,18 +6,8 @@ package(
    licenses = ["notice"],  # Apache 2.0
 )
 bias_add_kernel = """
 func @bias_add(%arg0: tensor<?x?xf99>,
         %arg1: tensor<?xf99>) -> tensor<?x?xf99> {
  %0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" }
    : (tensor<?x?xf99>, tensor<?xf99>) -> tensor<?x?xf99>
  return %0 : tensor<?x?xf99>
 }
 """
 gen_kernel_library(
    name = "bias_add",
    op = bias_add_kernel,
    same_shape = "0,2",
    tile_size = "16x16",
    types = [
@ -27,17 +17,8 @@ gen_kernel_library(
    ],
 )
 relu_kernel = """
 func @relu(%arg0: tensor<?xf99>) -> tensor<?xf99> {
  %0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" }
    : (tensor<?xf99>) -> tensor<?xf99>
  return %0 : tensor<?xf99>
 }
 """
 gen_kernel_library(
    name = "relu",
    op = relu_kernel,
    same_shape = "0,1",
    tile_size = "256",
    types = [
@ -47,17 +28,8 @@ gen_kernel_library(
    ],
 )
 tanh_kernel = """
 func @tanh(%arg0: tensor<?xf99>) -> tensor<?xf99> {
  %0 = "tf.Tanh"(%arg0) { T = "tfdtype$DT_TYPE" }
    : (tensor<?xf99>) -> tensor<?xf99>
  return %0 : tensor<?xf99>
 }
 """
 gen_kernel_library(
    name = "tanh",
    op = tanh_kernel,
    tile_size = "256",
    types = [
        "f32",
--- a/tensorflow/core/kernels/cubin_headers/bias_add.mlir.tmpl
+++ b/tensorflow/core/kernels/cubin_headers/bias_add.mlir.tmpl
@ -0,0 +1,6 @@
 func @bias_add(%arg0: tensor<?x?xf99>,
         %arg1: tensor<?xf99>) -> tensor<?x?xf99> {
  %0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" }
    : (tensor<?x?xf99>, tensor<?xf99>) -> tensor<?x?xf99>
  return %0 : tensor<?x?xf99>
 }
--- a/tensorflow/core/kernels/cubin_headers/build_defs.bzl
+++ b/tensorflow/core/kernels/cubin_headers/build_defs.bzl
@ -27,13 +27,14 @@ def _gen_kernel_image_hdr_impl(ctx):
        filename = "%s.%s.cubin" % (name, arch)
        cubin = ctx.actions.declare_file(filename)
        ctx.actions.run(
            inputs = [ctx.file.mlir_op],
            outputs = [cubin],
            executable = ctx.executable._tool,
            arguments = same_shape + [
                "--tile_sizes=%s" % tile_sizes,
                "--arch=%s" % arch.split("_")[1],
                "--input=%s" % ctx.file.mlir_op.path,
                "--output=%s" % cubin.path,
                ctx.attr.op,
            ],
            mnemonic = "compile",
        )
@ -70,7 +71,7 @@ _gen_kernel_image_hdr_rule = rule(
    implementation = _gen_kernel_image_hdr_impl,
    output_to_genfiles = True,
    attrs = {
-        "op": attr.string(mandatory = True),
+        "mlir_op": attr.label(mandatory = True, allow_single_file = True),
        "tile_size": attr.string(mandatory = True),
        "same_shape": attr.string(),
        "out": attr.output(mandatory = True),
@ -87,12 +88,12 @@ _gen_kernel_image_hdr_rule = rule(
    },
 )
-def _gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
+def _gen_kernel_image_hdr(name, mlir_op, tile_size, tags = [], same_shape = None):
    """Generates a C header with fatbin data from a Tensorflow op."""
    if cuda_gpu_architectures():
        _gen_kernel_image_hdr_rule(
            name = name,
-            op = op,
+            mlir_op = mlir_op,
            tile_size = tile_size,
            same_shape = same_shape,
            out = "%s.h" % name,
@ -101,17 +102,61 @@ def _gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
            tags = tags,
        )
-def gen_kernel_library(name, op, types, tile_size, tags = [], same_shape = None):
+def _gen_mlir_op_impl(ctx):
    if cuda_gpu_architectures():
    type_to_dtype = {
        "f16": "DT_HALF",
        "f32": "DT_FLOAT",
        "f64": "DT_DOUBLE",
    }
    ctx.actions.run_shell(
        inputs = [ctx.file.template],
        outputs = [ctx.outputs.out],
        command = "cat %s | sed s/f99/%s/g | sed s/DT_DTYPE/%s/g > %s" % (
            ctx.file.template.path,
            ctx.attr.type,
            type_to_dtype[ctx.attr.type],
            ctx.outputs.out.path,
        ),
    )
 _gen_mlir_op_rule = rule(
    implementation = _gen_mlir_op_impl,
    output_to_genfiles = True,
    attrs = {
        "template": attr.label(mandatory = True, allow_single_file = True),
        "type": attr.string(mandatory = True),
        "out": attr.output(mandatory = True),
    },
 )
 def _gen_mlir_op(name, type):
    _gen_mlir_op_rule(
        name = "generate_{name}_{type}_mlir".format(name = name, type = type),
        template = "{name}.mlir.tmpl".format(name = name),
        type = type,
        out = "{name}_{type}.mlir".format(name = name, type = type),
    )
 def gen_kernel_library(name, types, tile_size, tags = [], same_shape = None):
    """ Generate a library with kernels for a specific tensorflow op.
    Args:
      name: The name of the tensorflow op.
      types: The types ("f16", "f32", "f64") for which a kernel should be generated.
      tile_size: The tiling specification, e.g. "16x16".
      tags: The tags which should be added to the library.
      same_shape: The information about which shapes are the same, e.g. "0,1".
    """
    if cuda_gpu_architectures():
        for type in types:
            _gen_mlir_op(
                name = name,
                type = type,
            )
            _gen_kernel_image_hdr(
                name = "{name}_{type}_kernel".format(name = name, type = type),
-                op = op.replace("f99", type).replace("DT_TYPE", type_to_dtype[type]),
+                mlir_op = "{name}_{type}.mlir".format(name = name, type = type),
                tile_size = tile_size,
                tags = tags,
                same_shape = same_shape,
--- a/tensorflow/core/kernels/cubin_headers/relu.mlir.tmpl
+++ b/tensorflow/core/kernels/cubin_headers/relu.mlir.tmpl
@ -0,0 +1,5 @@
 func @relu(%arg0: tensor<?xf99>) -> tensor<?xf99> {
  %0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" }
    : (tensor<?xf99>) -> tensor<?xf99>
  return %0 : tensor<?xf99>
 }
--- a/tensorflow/core/kernels/cubin_headers/tanh.mlir.tmpl
+++ b/tensorflow/core/kernels/cubin_headers/tanh.mlir.tmpl
@ -0,0 +1,5 @@
 func @tanh(%arg0: tensor<?xf99>) -> tensor<?xf99> {
  %0 = "tf.Tanh"(%arg0) { T = "tfdtype$DT_DTYPE" }
    : (tensor<?xf99>) -> tensor<?xf99>
  return %0 : tensor<?xf99>
 }