Split up conv_2d kernels even further

conv_2d_gpu_int.cu.cc takes a really long time to compile for GPU. This is causing very slow builds for some users.

By splitting the file up even further, we can reduce the overall build time by allowing more parallelism. In my (somewhat limited) measurements, this reduces the time on the critical path from around 3.5 minutes to 2.5 minutes.

PiperOrigin-RevId: 293536410
Change-Id: Ieeb9fe42b0a35adf98ec5034776b1452648fcf60
This commit is contained in:
James Keeling 2020-02-06 00:31:44 -08:00 committed by TensorFlower Gardener
parent b755ddd51e
commit 04f4972685
4 changed files with 78 additions and 6 deletions

View File

@ -308,6 +308,8 @@ tf_kernel_library(
"conv_2d_gpu_float.cu.cc",
"conv_2d_gpu_half.cu.cc",
"conv_2d_gpu_int.cu.cc",
"conv_2d_gpu_int_spatial_convolution.cu.cc",
"conv_2d_gpu_int_spatial_convolution_backward.cu.cc",
"conv_2d_gpu_uint16.cu.cc",
"conv_2d_gpu_uint32.cu.cc",
"conv_2d_gpu_uint64.cu.cc",

View File

@ -29,16 +29,10 @@ namespace tensorflow {
namespace functor {
// For 2d ops.
template struct SpatialConvolution<Eigen::GpuDevice, int32>;
template struct MatMulConvFunctor<Eigen::GpuDevice, int32>;
template struct TransformFilter<Eigen::GpuDevice, int32, int, 4>;
template struct PadInput<Eigen::GpuDevice, int32, int, 4>;
template struct SpatialConvolutionBackwardInputFunc<Eigen::GpuDevice, int32>;
template struct SpatialConvolutionBackwardInputWithExplicitPaddingFunc<
Eigen::GpuDevice, int32>;
} // namespace functor
} // namespace tensorflow

View File

@ -0,0 +1,37 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
#include <algorithm>
#include <array>
#include <limits>
#include <utility>
#include "tensorflow/core/kernels/conv_2d.h"
#include "tensorflow/core/kernels/conv_2d_gpu.h"
namespace tensorflow {
namespace functor {
template struct SpatialConvolution<Eigen::GpuDevice, int32>;
} // namespace functor
} // namespace tensorflow
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -0,0 +1,39 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
#include <algorithm>
#include <array>
#include <limits>
#include <utility>
#include "tensorflow/core/kernels/conv_2d.h"
#include "tensorflow/core/kernels/conv_2d_gpu.h"
namespace tensorflow {
namespace functor {
template struct SpatialConvolutionBackwardInputFunc<Eigen::GpuDevice, int32>;
template struct SpatialConvolutionBackwardInputWithExplicitPaddingFunc<
Eigen::GpuDevice, int32>;
} // namespace functor
} // namespace tensorflow
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM