Split scan ops GPU code into multiple files
This file was a bottleneck during compilation, often taking many minutes to compile. In local testing this change reduces the wall-clock build time for the scan ops GPU kernels from 107s to 96s. PiperOrigin-RevId: 228304727
This commit is contained in:
parent
0ef4b19044
commit
3436665db2
@ -3279,7 +3279,15 @@ tf_kernel_library(
|
||||
|
||||
tf_kernel_library(
|
||||
name = "scan_ops",
|
||||
prefix = "scan_ops",
|
||||
srcs = ["scan_ops.cc"],
|
||||
hdrs = ["scan_ops.h"],
|
||||
gpu_srcs = [
|
||||
"scan_ops.h",
|
||||
"scan_ops_gpu.h",
|
||||
"scan_ops_gpu_double.cu.cc",
|
||||
"scan_ops_gpu_float.cu.cc",
|
||||
"scan_ops_gpu_half.cu.cc",
|
||||
],
|
||||
deps = MATH_DEPS + if_cuda(["@cub_archive//:cub"]),
|
||||
)
|
||||
|
||||
|
@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_KERNELS_SCAN_OPS_GPU_H_
|
||||
#define TENSORFLOW_CORE_KERNELS_SCAN_OPS_GPU_H_
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
@ -290,17 +293,8 @@ struct Scan<GPUDevice, Eigen::internal::ProdReducer<T>, T> {
|
||||
};
|
||||
|
||||
} // namespace functor
|
||||
|
||||
#define DEFINE(REDUCER, T) template struct functor::Scan<GPUDevice, REDUCER, T>;
|
||||
|
||||
#define DEFINE_FOR_ALL_REDUCERS(T) \
|
||||
DEFINE(Eigen::internal::SumReducer<T>, T); \
|
||||
DEFINE(Eigen::internal::ProdReducer<T>, T);
|
||||
|
||||
TF_CALL_GPU_NUMBER_TYPES(DEFINE_FOR_ALL_REDUCERS);
|
||||
#undef DEFINE_FOR_ALL_REDUCERS
|
||||
#undef DEFINE
|
||||
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
|
||||
#endif // TENSORFLOW_CORE_KERNELS_SCAN_OPS_GPU_H_
|
31
tensorflow/core/kernels/scan_ops_gpu_double.cu.cc
Normal file
31
tensorflow/core/kernels/scan_ops_gpu_double.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/scan_ops.h"
|
||||
#include "tensorflow/core/kernels/scan_ops_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
using Eigen::GpuDevice;
|
||||
template struct functor::Scan<GpuDevice, Eigen::internal::SumReducer<double>,
|
||||
double>;
|
||||
template struct functor::Scan<GpuDevice, Eigen::internal::ProdReducer<double>,
|
||||
double>;
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/scan_ops_gpu_float.cu.cc
Normal file
31
tensorflow/core/kernels/scan_ops_gpu_float.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/scan_ops.h"
|
||||
#include "tensorflow/core/kernels/scan_ops_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
using Eigen::GpuDevice;
|
||||
template struct functor::Scan<GpuDevice, Eigen::internal::SumReducer<float>,
|
||||
float>;
|
||||
template struct functor::Scan<GpuDevice, Eigen::internal::ProdReducer<float>,
|
||||
float>;
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
31
tensorflow/core/kernels/scan_ops_gpu_half.cu.cc
Normal file
31
tensorflow/core/kernels/scan_ops_gpu_half.cu.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "tensorflow/core/kernels/scan_ops.h"
|
||||
#include "tensorflow/core/kernels/scan_ops_gpu.h"
|
||||
|
||||
namespace tensorflow {
|
||||
using Eigen::GpuDevice;
|
||||
template struct functor::Scan<
|
||||
GpuDevice, Eigen::internal::SumReducer<Eigen::half>, Eigen::half>;
|
||||
template struct functor::Scan<
|
||||
GpuDevice, Eigen::internal::ProdReducer<Eigen::half>, Eigen::half>;
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
Loading…
Reference in New Issue
Block a user