diff --git a/tensorflow/core/kernels/cubin_headers/BUILD b/tensorflow/core/kernels/cubin_headers/BUILD new file mode 100644 index 00000000000..bb7995dd221 --- /dev/null +++ b/tensorflow/core/kernels/cubin_headers/BUILD @@ -0,0 +1,47 @@ +# Generates headers containing cubin for CUDA kernels. +load("//tensorflow/core/kernels/cubin_headers:build_defs.bzl", "gen_kernel_image_hdr") + +bias_add_kernel = """ +func @bias_add(%arg0: tensor, + %arg1: tensor) -> tensor { + %0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" } + : (tensor, tensor) -> tensor + return %0 : tensor +} +""" + +[ + gen_kernel_image_hdr( + name = "bias_add_{type}_kernel".format(type = type), + op = bias_add_kernel.replace("f99", type).replace("DT_TYPE", dtype), + same_shape = "0,2", + tile_size = "16x16", + ) + for (type, dtype) in [ + ("f16", "DT_HALF"), + ("f32", "DT_FLOAT"), + ("f64", "DT_DOUBLE"), + ] +] + +relu_kernel = """ +func @relu(%arg0: tensor) -> tensor { + %0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" } + : (tensor) -> tensor + return %0 : tensor +} +""" + +[ + gen_kernel_image_hdr( + name = "relu_{type}_kernel".format(type = type), + op = relu_kernel.replace("f99", type).replace("DT_TYPE", dtype), + same_shape = "0,1", + tile_size = "256", + ) + for (type, dtype) in [ + ("f16", "DT_HALF"), + ("f32", "DT_FLOAT"), + ("f64", "DT_DOUBLE"), + ] +] diff --git a/tensorflow/core/kernels/cubin_headers/build_defs.bzl b/tensorflow/core/kernels/cubin_headers/build_defs.bzl index b09c515c883..14f47601f06 100644 --- a/tensorflow/core/kernels/cubin_headers/build_defs.bzl +++ b/tensorflow/core/kernels/cubin_headers/build_defs.bzl @@ -1,6 +1,6 @@ """Generates cubin headers for TF dialect ops.""" -load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cuda") +load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures") def _lookup_file(filegroup, path): """Extracts file at (relative) path in filegroup.""" @@ -87,8 +87,8 @@ _gen_kernel_image_hdr = rule( def gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None): """Generates a C header with fatbin data from a Tensorflow op.""" - if_cuda( - if_true = [_gen_kernel_image_hdr( + if cuda_gpu_architectures(): + _gen_kernel_image_hdr( name = name, op = op, tile_size = tile_size, @@ -97,5 +97,4 @@ def gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None): symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""), gpu_archs = cuda_gpu_architectures(), tags = tags, - )], - ) + )