Only build the MLIR generated GPU kernels when needed.
To speed up CPU builds that still have a GPU configured, we put the deps and srcs inside cwise_unary_op behind if_cuda_or_rocm. Also remove the "manual" tag, it should not be necessary anymore. PiperOrigin-RevId: 332803016 Change-Id: I9c513f915b42468413aae0423fa0238350326e39
This commit is contained in:
parent
4d81db608e
commit
5558963f09
@ -6,10 +6,10 @@ load(
|
||||
"if_mlir_generated_gpu_kernels_enabled",
|
||||
)
|
||||
load(
|
||||
"//tensorflow/stream_executor:build_defs.bzl",
|
||||
"if_gpu_is_configured",
|
||||
"//tensorflow:tensorflow.bzl",
|
||||
"if_cuda_or_rocm",
|
||||
"tf_kernel_library",
|
||||
)
|
||||
load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
|
||||
load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
|
||||
load(
|
||||
"//tensorflow/core/platform:build_config_root.bzl",
|
||||
@ -30,24 +30,25 @@ config_setting(
|
||||
|
||||
tf_kernel_library(
|
||||
name = "cwise_unary_op",
|
||||
srcs = if_gpu_is_configured([
|
||||
# Technically these source files don't need --config=cuda or --config=rocm,
|
||||
# but we want to avoid building them if they are not needed.
|
||||
srcs = if_cuda_or_rocm([
|
||||
"cwise_op_gpu_abs.cc",
|
||||
"cwise_op_gpu_base.cc",
|
||||
"cwise_op_gpu_base.h",
|
||||
"cwise_op_gpu_tanh.cc",
|
||||
]),
|
||||
tags = ["manual"],
|
||||
deps = [
|
||||
deps = if_cuda_or_rocm([
|
||||
":abs_kernels",
|
||||
":tanh_kernels",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:stream_executor",
|
||||
"//third_party/eigen3",
|
||||
"@com_google_absl//absl/strings",
|
||||
"@com_google_absl//absl/synchronization",
|
||||
"@com_google_absl//absl/types:span",
|
||||
],
|
||||
"//third_party/eigen3",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:stream_executor",
|
||||
]),
|
||||
)
|
||||
|
||||
tf_cuda_cc_test(
|
||||
|
Loading…
Reference in New Issue
Block a user