Split tile functor GPU code into multiple files
This file was a bottleneck during compilation, often taking many minutes to compile. In local testing this change reduces the wall-clock build time for the tile functor GPU kernels from 217s to 71s. PiperOrigin-RevId: 228296771
This commit is contained in:
parent
b504d86ca2
commit
84cf1da2b9
tensorflow/core/kernels
@ -1012,7 +1012,16 @@ tf_kernel_library(
|
||||
hdrs = ["tile_functor.h"],
|
||||
gpu_srcs = [
|
||||
"tile_functor.h",
|
||||
"tile_functor_gpu.cu.cc",
|
||||
"tile_functor_gpu.h",
|
||||
"tile_functor_gpu_bool.cu.cc",
|
||||
"tile_functor_gpu_complex64.cu.cc",
|
||||
"tile_functor_gpu_complex128.cu.cc",
|
||||
"tile_functor_gpu_double.cu.cc",
|
||||
"tile_functor_gpu_float.cu.cc",
|
||||
"tile_functor_gpu_half.cu.cc",
|
||||
"tile_functor_gpu_int16.cu.cc",
|
||||
"tile_functor_gpu_int32.cu.cc",
|
||||
"tile_functor_gpu_int64.cu.cc",
|
||||
],
|
||||
prefix = "tile_ops",
|
||||
deps = ARRAY_DEPS,
|
||||
|
@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_KERNELS_TILE_FUNCTOR_GPU_H_
|
||||
#define TENSORFLOW_CORE_KERNELS_TILE_FUNCTOR_GPU_H_
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
@ -80,28 +83,7 @@ void TileSimple(const Device& d, Tensor* out, const Tensor& in) {
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
namespace functor {
|
||||
|
||||
typedef Eigen::GpuDevice GPUDevice;
|
||||
|
||||
// Register functors used for Tile functor.
|
||||
#define DEFINE_TYPE(T) \
|
||||
template struct Tile<GPUDevice, T, int32>; \
|
||||
template struct Tile<GPUDevice, T, int64>;
|
||||
|
||||
TF_CALL_bool(DEFINE_TYPE);
|
||||
TF_CALL_int16(DEFINE_TYPE);
|
||||
TF_CALL_int32(DEFINE_TYPE);
|
||||
TF_CALL_int64(DEFINE_TYPE);
|
||||
TF_CALL_float(DEFINE_TYPE);
|
||||
TF_CALL_double(DEFINE_TYPE);
|
||||
TF_CALL_half(DEFINE_TYPE);
|
||||
TF_CALL_complex64(DEFINE_TYPE);
|
||||
TF_CALL_complex128(DEFINE_TYPE);
|
||||
|
||||
#undef DEFINE_TYPE
|
||||
|
||||
} // end namespace functor
|
||||
} // namespace tensorflow
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
#endif // TENSORFLOW_CORE_KERNELS_TILE_FUNCTOR_GPU_H_
|
31
tensorflow/core/kernels/tile_functor_gpu_bool.cu.cc
Normal file
31
tensorflow/core/kernels/tile_functor_gpu_bool.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/tile_functor.h"
|
||||
#include "tensorflow/core/kernels/tile_functor_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
using Eigen::GpuDevice;
|
||||
|
||||
template struct Tile<GpuDevice, bool, int32>;
|
||||
template struct Tile<GpuDevice, bool, int64>;
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/tile_functor_gpu_complex128.cu.cc
Normal file
31
tensorflow/core/kernels/tile_functor_gpu_complex128.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/tile_functor.h"
|
||||
#include "tensorflow/core/kernels/tile_functor_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
using Eigen::GpuDevice;
|
||||
|
||||
template struct Tile<GpuDevice, complex128, int32>;
|
||||
template struct Tile<GpuDevice, complex128, int64>;
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/tile_functor_gpu_complex64.cu.cc
Normal file
31
tensorflow/core/kernels/tile_functor_gpu_complex64.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/tile_functor.h"
|
||||
#include "tensorflow/core/kernels/tile_functor_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
using Eigen::GpuDevice;
|
||||
|
||||
template struct Tile<GpuDevice, complex64, int32>;
|
||||
template struct Tile<GpuDevice, complex64, int64>;
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/tile_functor_gpu_double.cu.cc
Normal file
31
tensorflow/core/kernels/tile_functor_gpu_double.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/tile_functor.h"
|
||||
#include "tensorflow/core/kernels/tile_functor_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
using Eigen::GpuDevice;
|
||||
|
||||
template struct Tile<GpuDevice, double, int32>;
|
||||
template struct Tile<GpuDevice, double, int64>;
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/tile_functor_gpu_float.cu.cc
Normal file
31
tensorflow/core/kernels/tile_functor_gpu_float.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/tile_functor.h"
|
||||
#include "tensorflow/core/kernels/tile_functor_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
using Eigen::GpuDevice;
|
||||
|
||||
template struct Tile<GpuDevice, float, int32>;
|
||||
template struct Tile<GpuDevice, float, int64>;
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/tile_functor_gpu_half.cu.cc
Normal file
31
tensorflow/core/kernels/tile_functor_gpu_half.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/tile_functor.h"
|
||||
#include "tensorflow/core/kernels/tile_functor_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
using Eigen::GpuDevice;
|
||||
|
||||
template struct Tile<GpuDevice, Eigen::half, int32>;
|
||||
template struct Tile<GpuDevice, Eigen::half, int64>;
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/tile_functor_gpu_int16.cu.cc
Normal file
31
tensorflow/core/kernels/tile_functor_gpu_int16.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/tile_functor.h"
|
||||
#include "tensorflow/core/kernels/tile_functor_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
using Eigen::GpuDevice;
|
||||
|
||||
template struct Tile<GpuDevice, int16, int32>;
|
||||
template struct Tile<GpuDevice, int16, int64>;
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/tile_functor_gpu_int32.cu.cc
Normal file
31
tensorflow/core/kernels/tile_functor_gpu_int32.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/tile_functor.h"
|
||||
#include "tensorflow/core/kernels/tile_functor_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
using Eigen::GpuDevice;
|
||||
|
||||
template struct Tile<GpuDevice, int32, int32>;
|
||||
template struct Tile<GpuDevice, int32, int64>;
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/tile_functor_gpu_int64.cu.cc
Normal file
31
tensorflow/core/kernels/tile_functor_gpu_int64.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/tile_functor.h"
|
||||
#include "tensorflow/core/kernels/tile_functor_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace functor {
|
||||
using Eigen::GpuDevice;
|
||||
|
||||
template struct Tile<GpuDevice, int64, int32>;
|
||||
template struct Tile<GpuDevice, int64, int64>;
|
||||
} // namespace functor
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
Loading…
Reference in New Issue
Block a user