Generate cubin headers for bias_add and relu.
Also, instead of checking if_cuda, check whether cuda_gpu_architectures() is non-empty. PiperOrigin-RevId: 311521784 Change-Id: I6a1a7e9cefc8e845e69d62fb3c19d9976b0f2196
This commit is contained in:
parent
a04c8be3e7
commit
015197cf8b
|
@ -0,0 +1,47 @@
|
|||
# Generates headers containing cubin for CUDA kernels.
|
||||
load("//tensorflow/core/kernels/cubin_headers:build_defs.bzl", "gen_kernel_image_hdr")
|
||||
|
||||
bias_add_kernel = """
|
||||
func @bias_add(%arg0: tensor<?x?xf99>,
|
||||
%arg1: tensor<?xf99>) -> tensor<?x?xf99> {
|
||||
%0 = "tf.BiasAdd"(%arg0, %arg1) { T = "tfdtype$DT_TYPE" }
|
||||
: (tensor<?x?xf99>, tensor<?xf99>) -> tensor<?x?xf99>
|
||||
return %0 : tensor<?x?xf99>
|
||||
}
|
||||
"""
|
||||
|
||||
[
|
||||
gen_kernel_image_hdr(
|
||||
name = "bias_add_{type}_kernel".format(type = type),
|
||||
op = bias_add_kernel.replace("f99", type).replace("DT_TYPE", dtype),
|
||||
same_shape = "0,2",
|
||||
tile_size = "16x16",
|
||||
)
|
||||
for (type, dtype) in [
|
||||
("f16", "DT_HALF"),
|
||||
("f32", "DT_FLOAT"),
|
||||
("f64", "DT_DOUBLE"),
|
||||
]
|
||||
]
|
||||
|
||||
relu_kernel = """
|
||||
func @relu(%arg0: tensor<?xf99>) -> tensor<?xf99> {
|
||||
%0 = "tf.Relu"(%arg0) { T = "tfdtype$DT_TYPE" }
|
||||
: (tensor<?xf99>) -> tensor<?xf99>
|
||||
return %0 : tensor<?xf99>
|
||||
}
|
||||
"""
|
||||
|
||||
[
|
||||
gen_kernel_image_hdr(
|
||||
name = "relu_{type}_kernel".format(type = type),
|
||||
op = relu_kernel.replace("f99", type).replace("DT_TYPE", dtype),
|
||||
same_shape = "0,1",
|
||||
tile_size = "256",
|
||||
)
|
||||
for (type, dtype) in [
|
||||
("f16", "DT_HALF"),
|
||||
("f32", "DT_FLOAT"),
|
||||
("f64", "DT_DOUBLE"),
|
||||
]
|
||||
]
|
|
@ -1,6 +1,6 @@
|
|||
"""Generates cubin headers for TF dialect ops."""
|
||||
|
||||
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cuda")
|
||||
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures")
|
||||
|
||||
def _lookup_file(filegroup, path):
|
||||
"""Extracts file at (relative) path in filegroup."""
|
||||
|
@ -87,8 +87,8 @@ _gen_kernel_image_hdr = rule(
|
|||
|
||||
def gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
|
||||
"""Generates a C header with fatbin data from a Tensorflow op."""
|
||||
if_cuda(
|
||||
if_true = [_gen_kernel_image_hdr(
|
||||
if cuda_gpu_architectures():
|
||||
_gen_kernel_image_hdr(
|
||||
name = name,
|
||||
op = op,
|
||||
tile_size = tile_size,
|
||||
|
@ -97,5 +97,4 @@ def gen_kernel_image_hdr(name, op, tile_size, tags = [], same_shape = None):
|
|||
symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
|
||||
gpu_archs = cuda_gpu_architectures(),
|
||||
tags = tags,
|
||||
)],
|
||||
)
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue