Split up conv_2d kernels even further

conv_2d_gpu_int.cu.cc takes a really long time to compile for GPU. This is causing very slow builds for some users. By splitting the file up even further, we can reduce the overall build time by allowing more parallelism. In my (somewhat limited) measurements, this reduces the time on the critical path from around 3.5 minutes to 2.5 minutes. PiperOrigin-RevId: 293536410 Change-Id: Ieeb9fe42b0a35adf98ec5034776b1452648fcf60
2020-02-06 00:31:44 -08:00 · 2020-02-06 00:31:44 -08:00 · 04f4972685
commit 04f4972685
parent b755ddd51e
4 changed files with 78 additions and 6 deletions
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -308,6 +308,8 @@ tf_kernel_library(
        "conv_2d_gpu_float.cu.cc",
        "conv_2d_gpu_half.cu.cc",
        "conv_2d_gpu_int.cu.cc",
+        "conv_2d_gpu_int_spatial_convolution.cu.cc",
+        "conv_2d_gpu_int_spatial_convolution_backward.cu.cc",
        "conv_2d_gpu_uint16.cu.cc",
        "conv_2d_gpu_uint32.cu.cc",
        "conv_2d_gpu_uint64.cu.cc",
--- a/tensorflow/core/kernels/conv_2d_gpu_int.cu.cc
+++ b/tensorflow/core/kernels/conv_2d_gpu_int.cu.cc
@ -29,16 +29,10 @@ namespace tensorflow {

 namespace functor {

-// For 2d ops.
-template struct SpatialConvolution<Eigen::GpuDevice, int32>;
 template struct MatMulConvFunctor<Eigen::GpuDevice, int32>;
 template struct TransformFilter<Eigen::GpuDevice, int32, int, 4>;
 template struct PadInput<Eigen::GpuDevice, int32, int, 4>;

-template struct SpatialConvolutionBackwardInputFunc<Eigen::GpuDevice, int32>;
-template struct SpatialConvolutionBackwardInputWithExplicitPaddingFunc<
-    Eigen::GpuDevice, int32>;
-
 }  // namespace functor
 }  // namespace tensorflow

--- a/tensorflow/core/kernels/conv_2d_gpu_int_spatial_convolution.cu.cc
+++ b/tensorflow/core/kernels/conv_2d_gpu_int_spatial_convolution.cu.cc
@ -0,0 +1,37 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
+#define EIGEN_USE_GPU
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <utility>
+
+#include "tensorflow/core/kernels/conv_2d.h"
+#include "tensorflow/core/kernels/conv_2d_gpu.h"
+
+namespace tensorflow {
+
+namespace functor {
+
+template struct SpatialConvolution<Eigen::GpuDevice, int32>;
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/conv_2d_gpu_int_spatial_convolution_backward.cu.cc
+++ b/tensorflow/core/kernels/conv_2d_gpu_int_spatial_convolution_backward.cu.cc
@ -0,0 +1,39 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
+#define EIGEN_USE_GPU
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <utility>
+
+#include "tensorflow/core/kernels/conv_2d.h"
+#include "tensorflow/core/kernels/conv_2d_gpu.h"
+
+namespace tensorflow {
+
+namespace functor {
+
+template struct SpatialConvolutionBackwardInputFunc<Eigen::GpuDevice, int32>;
+template struct SpatialConvolutionBackwardInputWithExplicitPaddingFunc<
+    Eigen::GpuDevice, int32>;
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM