Split scan ops GPU code into multiple files

This file was a bottleneck during compilation, often taking many minutes to compile. In local testing this change reduces the wall-clock build time for the scan ops GPU kernels from 107s to 96s. PiperOrigin-RevId: 228304727
2019-01-08 03:28:39 -08:00 · 2019-01-08 03:28:39 -08:00 · 3436665db2
commit 3436665db2
parent 0ef4b19044
5 changed files with 107 additions and 12 deletions
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -3279,7 +3279,15 @@ tf_kernel_library(

 tf_kernel_library(
    name = "scan_ops",
-    prefix = "scan_ops",
+    srcs = ["scan_ops.cc"],
+    hdrs = ["scan_ops.h"],
+    gpu_srcs = [
+        "scan_ops.h",
+        "scan_ops_gpu.h",
+        "scan_ops_gpu_double.cu.cc",
+        "scan_ops_gpu_float.cu.cc",
+        "scan_ops_gpu_half.cu.cc",
+    ],
    deps = MATH_DEPS + if_cuda(["@cub_archive//:cub"]),
 )

--- a/tensorflow/core/kernels/scan_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/scan_ops_gpu.cu.cc
@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

+#ifndef TENSORFLOW_CORE_KERNELS_SCAN_OPS_GPU_H_
+#define TENSORFLOW_CORE_KERNELS_SCAN_OPS_GPU_H_
+
 #if GOOGLE_CUDA

 #define EIGEN_USE_GPU
@ -290,17 +293,8 @@ struct Scan<GPUDevice, Eigen::internal::ProdReducer<T>, T> {
 };

 }  // namespace functor
-
-#define DEFINE(REDUCER, T) template struct functor::Scan<GPUDevice, REDUCER, T>;
-
-#define DEFINE_FOR_ALL_REDUCERS(T)           \
-  DEFINE(Eigen::internal::SumReducer<T>, T); \
-  DEFINE(Eigen::internal::ProdReducer<T>, T);
-
-TF_CALL_GPU_NUMBER_TYPES(DEFINE_FOR_ALL_REDUCERS);
-#undef DEFINE_FOR_ALL_REDUCERS
-#undef DEFINE
-
 }  // end namespace tensorflow

 #endif  // GOOGLE_CUDA
+
+#endif  // TENSORFLOW_CORE_KERNELS_SCAN_OPS_GPU_H_
--- a/tensorflow/core/kernels/scan_ops_gpu_double.cu.cc
+++ b/tensorflow/core/kernels/scan_ops_gpu_double.cu.cc
@ -0,0 +1,31 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/scan_ops.h"
+#include "tensorflow/core/kernels/scan_ops_gpu.h"
+
+namespace tensorflow {
+using Eigen::GpuDevice;
+template struct functor::Scan<GpuDevice, Eigen::internal::SumReducer<double>,
+                              double>;
+template struct functor::Scan<GpuDevice, Eigen::internal::ProdReducer<double>,
+                              double>;
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
--- a/tensorflow/core/kernels/scan_ops_gpu_float.cu.cc
+++ b/tensorflow/core/kernels/scan_ops_gpu_float.cu.cc
@ -0,0 +1,31 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/scan_ops.h"
+#include "tensorflow/core/kernels/scan_ops_gpu.h"
+
+namespace tensorflow {
+using Eigen::GpuDevice;
+template struct functor::Scan<GpuDevice, Eigen::internal::SumReducer<float>,
+                              float>;
+template struct functor::Scan<GpuDevice, Eigen::internal::ProdReducer<float>,
+                              float>;
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
--- a/tensorflow/core/kernels/scan_ops_gpu_half.cu.cc
+++ b/tensorflow/core/kernels/scan_ops_gpu_half.cu.cc
@ -0,0 +1,31 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/scan_ops.h"
+#include "tensorflow/core/kernels/scan_ops_gpu.h"
+
+namespace tensorflow {
+using Eigen::GpuDevice;
+template struct functor::Scan<
+    GpuDevice, Eigen::internal::SumReducer<Eigen::half>, Eigen::half>;
+template struct functor::Scan<
+    GpuDevice, Eigen::internal::ProdReducer<Eigen::half>, Eigen::half>;
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA