From 56ef8c1ac289a38fa00cd6da756e46f968d63ebb Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Mon, 8 Jun 2020 00:57:53 -0700
Subject: [PATCH] Use an input file for tf_to_cubin.

Also use a template file for the mlir definitions of the tensorflow ops. Add
another build macro which converts the template to a mlir file by replacing
the type placeholders.

PiperOrigin-RevId: 315227102
Change-Id: I75cd2109c4b087122ae7ecb6b1d0d5fe35813a25
---
 .../mlir/tools/kernel_gen/tf_to_cubin.cc      | 12 +++-
 tensorflow/core/kernels/cubin_headers/BUILD   | 28 --------
 .../kernels/cubin_headers/bias_add.mlir.tmpl  |  6 ++
 .../core/kernels/cubin_headers/build_defs.bzl | 67 ++++++++++++++++---
 .../core/kernels/cubin_headers/relu.mlir.tmpl |  5 ++
 .../core/kernels/cubin_headers/tanh.mlir.tmpl |  5 ++
 6 files changed, 83 insertions(+), 40 deletions(-)
 create mode 100644 tensorflow/core/kernels/cubin_headers/bias_add.mlir.tmpl
 create mode 100644 tensorflow/core/kernels/cubin_headers/relu.mlir.tmpl
 create mode 100644 tensorflow/core/kernels/cubin_headers/tanh.mlir.tmpl

diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
index 8edc567e777..66fcabde0ac 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_cubin.cc
@@ -47,6 +47,7 @@ bool ParseStringList(std::string string_list, std::vector<uint32_t>* result) {
 }  // namespace
 
 int main(int argc, char** argv) {
+  std::string input_file = "foo.mlir";
   std::string output_file = "foo.bin";
   int32_t architecture = 50;
   std::vector<uint32_t> tile_sizes;
@@ -75,6 +76,7 @@ int main(int argc, char** argv) {
   };
 
   std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag("input", &input_file, "input file"),
       tensorflow::Flag("output", &output_file, "output file"),
       tensorflow::Flag("arch", &architecture,
                        "target architecture (e.g. 50 for sm_50)"),
@@ -94,8 +96,16 @@ int main(int argc, char** argv) {
   std::pair<int32_t, int32_t> compute_capability(architecture / 10,
                                                  architecture % 10);
 
+  std::string tf_code;
+  auto read_status = tensorflow::ReadFileToString(tensorflow::Env::Default(),
+                                                  input_file, &tf_code);
+  if (!read_status.ok()) {
+    LOG(ERROR) << read_status;
+    return 1;
+  }
+
   auto cubin = tensorflow::kernel_gen::GenerateCubinForTfCode(
-      argv[1], compute_capability, tile_sizes, same_shape, unroll_factors);
+      tf_code, compute_capability, tile_sizes, same_shape, unroll_factors);
 
   if (!cubin.ok()) {
     LOG(ERROR) << cubin.status();
diff --git a/tensorflow/core/kernels/cubin_headers/BUILD b/tensorflow/core/kernels/cubin_headers/BUILD
index 1d9b98543f1..a7f810eeded 100644
--- a/tensorflow/core/kernels/cubin_headers/BUILD
+++ b/tensorflow/core/kernels/cubin_headers/BUILD
@@ -6,18 +6,8 @@ package(
     licenses = ["notice"],  # Apache 2.0
 )
 
-bias_add_kernel = """
-func @bias_add(%arg0: tensor<?x?xf99>,
-         %arg1: tensor<?xf99>) -> tensor<?x?xf99> {
-  %0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" }
-    : (tensor<?x?xf99>, tensor<?xf99>) -> tensor<?x?xf99>
-  return %0 : tensor<?x?xf99>
-}
-"""
-
 gen_kernel_library(
     name = "bias_add",
-    op = bias_add_kernel,
     same_shape = "0,2",
     tile_size = "16x16",
     types = [
@@ -27,17 +17,8 @@ gen_kernel_library(
     ],
 )
 
-relu_kernel = """
-func @relu(%arg0: tensor<?xf99>) -> tensor<?xf99> {
-  %0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" }
-    : (tensor<?xf99>) -> tensor<?xf99>
-  return %0 : tensor<?xf99>
-}
-"""
-
 gen_kernel_library(
     name = "relu",
-    op = relu_kernel,
     same_shape = "0,1",
     tile_size = "256",
     types = [
@@ -47,17 +28,8 @@ gen_kernel_library(
     ],
 )
 
-tanh_kernel = """
-func @tanh(%arg0: tensor<?xf99>) -> tensor<?xf99> {
-  %0 = "tf.Tanh"(%arg0) { T = "tfdtype$DT_TYPE" }
-    : (tensor<?xf99>) -> tensor<?xf99>
-  return %0 : tensor<?xf99>
-}
-"""
-
 gen_kernel_library(
     name = "tanh",
-    op = tanh_kernel,
     tile_size = "256",
     types = [
         "f32",
diff --git a/tensorflow/core/kernels/cubin_headers/bias_add.mlir.tmpl b/tensorflow/core/kernels/cubin_headers/bias_add.mlir.tmpl
new file mode 100644
index 00000000000..edbee639143
--- /dev/null
+++ b/tensorflow/core/kernels/cubin_headers/bias_add.mlir.tmpl
@@ -0,0 +1,6 @@
+func @bias_add(%arg0: tensor<?x?xf99>,
+         %arg1: tensor<?xf99>) -> tensor<?x?xf99> {
+  %0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" }
+    : (tensor<?x?xf99>, tensor<?xf99>) -> tensor<?x?xf99>
+  return %0 : tensor<?x?xf99>
+}
diff --git a/tensorflow/core/kernels/cubin_headers/build_defs.bzl b/tensorflow/core/kernels/cubin_headers/build_defs.bzl
index bd19d7edec3..ffeb06e6b36 100644
--- a/tensorflow/core/kernels/cubin_headers/build_defs.bzl
+++ b/tensorflow/core/kernels/cubin_headers/build_defs.bzl
@@ -27,13 +27,14 @@ def _gen_kernel_image_hdr_impl(ctx):
         filename = "%s.%s.cubin" % (name, arch)
         cubin = ctx.actions.declare_file(filename)
         ctx.actions.run(
+            inputs = [ctx.file.mlir_op],
             outputs = [cubin],
             executable = ctx.executable._tool,
             arguments = same_shape + [
                 "--tile_sizes=%s" % tile_sizes,
                 "--arch=%s" % arch.split("_")[1],
+                "--input=%s" % ctx.file.mlir_op.path,
                 "--output=%s" % cubin.path,
-                ctx.attr.op,
             ],
             mnemonic = "compile",
         )
@@ -70,7 +71,7 @@ _gen_kernel_image_hdr_rule = rule(
     implementation = _gen_kernel_image_hdr_impl,
     output_to_genfiles = True,
     attrs = {
-        "op": attr.string(mandatory = True),
+        "mlir_op": attr.label(mandatory = True, allow_single_file = True),
         "tile_size": attr.string(mandatory = True),
         "same_shape": attr.string(),
         "out": attr.output(mandatory = True),
@@ -87,12 +88,12 @@ _gen_kernel_image_hdr_rule = rule(
     },
 )
 
-def _gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
+def _gen_kernel_image_hdr(name, mlir_op, tile_size, tags = [], same_shape = None):
     """Generates a C header with fatbin data from a Tensorflow op."""
     if cuda_gpu_architectures():
         _gen_kernel_image_hdr_rule(
             name = name,
-            op = op,
+            mlir_op = mlir_op,
             tile_size = tile_size,
             same_shape = same_shape,
             out = "%s.h" % name,
@@ -101,17 +102,61 @@ def _gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
             tags = tags,
         )
 
-def gen_kernel_library(name, op, types, tile_size, tags = [], same_shape = None):
+def _gen_mlir_op_impl(ctx):
+    type_to_dtype = {
+        "f16": "DT_HALF",
+        "f32": "DT_FLOAT",
+        "f64": "DT_DOUBLE",
+    }
+    ctx.actions.run_shell(
+        inputs = [ctx.file.template],
+        outputs = [ctx.outputs.out],
+        command = "cat %s | sed s/f99/%s/g | sed s/DT_DTYPE/%s/g > %s" % (
+            ctx.file.template.path,
+            ctx.attr.type,
+            type_to_dtype[ctx.attr.type],
+            ctx.outputs.out.path,
+        ),
+    )
+
+_gen_mlir_op_rule = rule(
+    implementation = _gen_mlir_op_impl,
+    output_to_genfiles = True,
+    attrs = {
+        "template": attr.label(mandatory = True, allow_single_file = True),
+        "type": attr.string(mandatory = True),
+        "out": attr.output(mandatory = True),
+    },
+)
+
+def _gen_mlir_op(name, type):
+    _gen_mlir_op_rule(
+        name = "generate_{name}_{type}_mlir".format(name = name, type = type),
+        template = "{name}.mlir.tmpl".format(name = name),
+        type = type,
+        out = "{name}_{type}.mlir".format(name = name, type = type),
+    )
+
+def gen_kernel_library(name, types, tile_size, tags = [], same_shape = None):
+    """ Generate a library with kernels for a specific tensorflow op.
+
+    Args:
+      name: The name of the tensorflow op.
+      types: The types ("f16", "f32", "f64") for which a kernel should be generated.
+      tile_size: The tiling specification, e.g. "16x16".
+      tags: The tags which should be added to the library.
+      same_shape: The information about which shapes are the same, e.g. "0,1".
+    """
+
     if cuda_gpu_architectures():
-        type_to_dtype = {
-            "f16": "DT_HALF",
-            "f32": "DT_FLOAT",
-            "f64": "DT_DOUBLE",
-        }
         for type in types:
+            _gen_mlir_op(
+                name = name,
+                type = type,
+            )
             _gen_kernel_image_hdr(
                 name = "{name}_{type}_kernel".format(name = name, type = type),
-                op = op.replace("f99", type).replace("DT_TYPE", type_to_dtype[type]),
+                mlir_op = "{name}_{type}.mlir".format(name = name, type = type),
                 tile_size = tile_size,
                 tags = tags,
                 same_shape = same_shape,
diff --git a/tensorflow/core/kernels/cubin_headers/relu.mlir.tmpl b/tensorflow/core/kernels/cubin_headers/relu.mlir.tmpl
new file mode 100644
index 00000000000..1b761c3736e
--- /dev/null
+++ b/tensorflow/core/kernels/cubin_headers/relu.mlir.tmpl
@@ -0,0 +1,5 @@
+func @relu(%arg0: tensor<?xf99>) -> tensor<?xf99> {
+  %0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" }
+    : (tensor<?xf99>) -> tensor<?xf99>
+  return %0 : tensor<?xf99>
+}
diff --git a/tensorflow/core/kernels/cubin_headers/tanh.mlir.tmpl b/tensorflow/core/kernels/cubin_headers/tanh.mlir.tmpl
new file mode 100644
index 00000000000..bc044153c61
--- /dev/null
+++ b/tensorflow/core/kernels/cubin_headers/tanh.mlir.tmpl
@@ -0,0 +1,5 @@
+func @tanh(%arg0: tensor<?xf99>) -> tensor<?xf99> {
+  %0 = "tf.Tanh"(%arg0) { T = "tfdtype$DT_DTYPE" }
+    : (tensor<?xf99>) -> tensor<?xf99>
+  return %0 : tensor<?xf99>
+}