Generate cubin headers for bias_add and relu.

Also, instead of checking if_cuda, check whether cuda_gpu_architectures() is non-empty. PiperOrigin-RevId: 311521784 Change-Id: I6a1a7e9cefc8e845e69d62fb3c19d9976b0f2196
2020-05-14 06:41:25 -07:00 · 2020-05-14 06:41:25 -07:00 · 015197cf8b
parent a04c8be3e7
commit 015197cf8b
2 changed files with 51 additions and 5 deletions
--- a/tensorflow/core/kernels/cubin_headers/BUILD
+++ b/tensorflow/core/kernels/cubin_headers/BUILD
@ -0,0 +1,47 @@
+# Generates headers containing cubin for CUDA kernels.
+load("//tensorflow/core/kernels/cubin_headers:build_defs.bzl", "gen_kernel_image_hdr")
+
+bias_add_kernel = """
+func @bias_add(%arg0: tensor<?x?xf99>,
+         %arg1: tensor<?xf99>) -> tensor<?x?xf99> {
+  %0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" }
+    : (tensor<?x?xf99>, tensor<?xf99>) -> tensor<?x?xf99>
+  return %0 : tensor<?x?xf99>
+}
+"""
+
+[
+    gen_kernel_image_hdr(
+        name = "bias_add_{type}_kernel".format(type = type),
+        op = bias_add_kernel.replace("f99", type).replace("DT_TYPE", dtype),
+        same_shape = "0,2",
+        tile_size = "16x16",
+    )
+    for (type, dtype) in [
+        ("f16", "DT_HALF"),
+        ("f32", "DT_FLOAT"),
+        ("f64", "DT_DOUBLE"),
+    ]
+]
+
+relu_kernel = """
+func @relu(%arg0: tensor<?xf99>) -> tensor<?xf99> {
+  %0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" }
+    : (tensor<?xf99>) -> tensor<?xf99>
+  return %0 : tensor<?xf99>
+}
+"""
+
+[
+    gen_kernel_image_hdr(
+        name = "relu_{type}_kernel".format(type = type),
+        op = relu_kernel.replace("f99", type).replace("DT_TYPE", dtype),
+        same_shape = "0,1",
+        tile_size = "256",
+    )
+    for (type, dtype) in [
+        ("f16", "DT_HALF"),
+        ("f32", "DT_FLOAT"),
+        ("f64", "DT_DOUBLE"),
+    ]
+]
--- a/tensorflow/core/kernels/cubin_headers/build_defs.bzl
+++ b/tensorflow/core/kernels/cubin_headers/build_defs.bzl
@ -1,6 +1,6 @@
 """Generates cubin headers for TF dialect ops."""

-load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cuda")
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures")

 def _lookup_file(filegroup, path):
    """Extracts file at (relative) path in filegroup."""
@ -87,8 +87,8 @@ _gen_kernel_image_hdr = rule(

 def gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
    """Generates a C header with fatbin data from a Tensorflow op."""
-    if_cuda(
-        if_true = [_gen_kernel_image_hdr(
+    if cuda_gpu_architectures():
+        _gen_kernel_image_hdr(
            name = name,
            op = op,
            tile_size = tile_size,
@ -97,5 +97,4 @@ def gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
            symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
            gpu_archs = cuda_gpu_architectures(),
            tags = tags,
-        )],
-    )
+        )