Adding ROCm support for reduction ops

2019-06-25 15:26:27 +00:00 · 2019-06-25 15:26:27 +00:00 · 69a449a49f
commit 69a449a49f
parent 28d194b368
16 changed files with 34 additions and 22 deletions
--- a/tensorflow/core/kernels/reduction_ops.h
+++ b/tensorflow/core/kernels/reduction_ops.h
@ -117,6 +117,10 @@ struct Identity {
 FIX_MEAN_IDENTITY(Eigen::half)
 FIX_MEAN_IDENTITY(float)
 FIX_MEAN_IDENTITY(double)
+#if GOOGLE_CUDA
+FIX_MEAN_IDENTITY(complex64)
+FIX_MEAN_IDENTITY(complex128)
+#endif
 #undef FIX_MEAN_IDENTITY

 template <typename Device, typename OUT_T, typename Reducer>
--- a/tensorflow/core/kernels/reduction_ops_all.cc
+++ b/tensorflow/core/kernels/reduction_ops_all.cc
@ -30,7 +30,7 @@ REGISTER_KERNEL_BUILDER(
        .HostMemory("reduction_indices"),
    ReductionOp<CPUDevice, bool, int64, Eigen::internal::AndReducer>);

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_KERNEL_BUILDER(
    Name("All")
        .TypeConstraint<int32>("Tidx")
--- a/tensorflow/core/kernels/reduction_ops_any.cc
+++ b/tensorflow/core/kernels/reduction_ops_any.cc
@ -30,7 +30,7 @@ REGISTER_KERNEL_BUILDER(
        .HostMemory("reduction_indices"),
    ReductionOp<CPUDevice, bool, int64, Eigen::internal::OrReducer>);

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_KERNEL_BUILDER(
    Name("Any")
        .TypeConstraint<int32>("Tidx")
--- a/tensorflow/core/kernels/reduction_ops_common_gpu.h
+++ b/tensorflow/core/kernels/reduction_ops_common_gpu.h
@ -15,8 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_GPU_H_
 #define TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_GPU_H_

-#if !GOOGLE_CUDA
-#error This file must only be included when building with Cuda support
+#if !GOOGLE_CUDA && !TENSORFLOW_USE_ROCM
+#error This file must only be included when building with GPU support
 #endif

 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
--- a/tensorflow/core/kernels/reduction_ops_euclidean.cc
+++ b/tensorflow/core/kernels/reduction_ops_euclidean.cc
@ -33,7 +33,7 @@ namespace tensorflow {
 TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
 #undef REGISTER_CPU_KERNELS

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define REGISTER_GPU_KERNELS(type)                                           \
  REGISTER_KERNEL_BUILDER(Name("EuclideanNorm")                              \
@ -51,8 +51,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
                          ReductionOp<GPUDevice, type, int64,                \
                                      functor::EuclideanNormReducer<type>>);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
+#if GOOGLE_CUDA
 TF_CALL_complex64(REGISTER_GPU_KERNELS);
 TF_CALL_complex128(REGISTER_GPU_KERNELS);
+#endif
 #undef REGISTER_GPU_KERNELS

 #endif
--- a/tensorflow/core/kernels/reduction_ops_gpu_bool.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu_bool.cu.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define EIGEN_USE_GPU

@ -59,4 +59,4 @@ DEFINE_FOR_TYPE_AND_R(bool, Eigen::internal::OrReducer);
 }  // end namespace functor
 }  // end namespace tensorflow

-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/reduction_ops_gpu_double.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu_double.cu.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define EIGEN_USE_GPU

@ -67,4 +67,4 @@ DEFINE_FOR_ALL_REDUCERS(double);
 }  // end namespace functor
 }  // end namespace tensorflow

-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/reduction_ops_gpu_float.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu_float.cu.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define EIGEN_USE_GPU

@ -67,4 +67,4 @@ DEFINE_FOR_ALL_REDUCERS(float);
 }  // end namespace functor
 }  // end namespace tensorflow

-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/reduction_ops_gpu_int.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu_int.cu.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define EIGEN_USE_GPU

@ -68,4 +68,4 @@ DEFINE_FOR_ALL_REDUCERS(int64);
 }  // end namespace functor
 }  // end namespace tensorflow

-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/reduction_ops_half_mean_sum.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_half_mean_sum.cu.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define EIGEN_USE_GPU

@ -64,4 +64,4 @@ DEFINE_FOR_ALL_REDUCERS(Eigen::half);
 }  // end namespace functor
 }  // end namespace tensorflow

-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/reduction_ops_half_prod_max_min.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_half_prod_max_min.cu.cc
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define EIGEN_USE_GPU

@ -64,4 +64,4 @@ DEFINE_FOR_ALL_REDUCERS(Eigen::half);
 }  // end namespace functor
 }  // end namespace tensorflow

-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
--- a/tensorflow/core/kernels/reduction_ops_max.cc
+++ b/tensorflow/core/kernels/reduction_ops_max.cc
@ -33,7 +33,7 @@ namespace tensorflow {
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
 #undef REGISTER_CPU_KERNELS

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define REGISTER_GPU_KERNELS(type)                                             \
  REGISTER_KERNEL_BUILDER(                                                     \
--- a/tensorflow/core/kernels/reduction_ops_mean.cc
+++ b/tensorflow/core/kernels/reduction_ops_mean.cc
@ -33,7 +33,7 @@ namespace tensorflow {
 TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
 #undef REGISTER_CPU_KERNELS

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define REGISTER_GPU_KERNELS(type)                                      \
  REGISTER_KERNEL_BUILDER(                                              \
@ -51,8 +51,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
          .HostMemory("reduction_indices"),                             \
      ReductionOp<GPUDevice, type, int64, functor::MeanReducer<type>>);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
+#if GOOGLE_CUDA
 TF_CALL_complex64(REGISTER_GPU_KERNELS);
 TF_CALL_complex128(REGISTER_GPU_KERNELS);
+#endif
 #undef REGISTER_GPU_KERNELS

 #endif
--- a/tensorflow/core/kernels/reduction_ops_min.cc
+++ b/tensorflow/core/kernels/reduction_ops_min.cc
@ -33,7 +33,7 @@ namespace tensorflow {
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
 #undef REGISTER_CPU_KERNELS

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define REGISTER_GPU_KERNELS(type)                                             \
  REGISTER_KERNEL_BUILDER(                                                     \
--- a/tensorflow/core/kernels/reduction_ops_prod.cc
+++ b/tensorflow/core/kernels/reduction_ops_prod.cc
@ -33,7 +33,7 @@ namespace tensorflow {
 TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
 #undef REGISTER_CPU_KERNELS

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define REGISTER_GPU_KERNELS(type)                                          \
  REGISTER_KERNEL_BUILDER(Name("Prod")                                      \
@ -52,8 +52,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
                                      Eigen::internal::ProdReducer<type>>);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
 TF_CALL_int32(REGISTER_GPU_KERNELS);
+#if GOOGLE_CUDA
 TF_CALL_complex64(REGISTER_GPU_KERNELS);
 TF_CALL_complex128(REGISTER_GPU_KERNELS);
+#endif
 #undef REGISTER_GPU_KERNELS

 #endif
--- a/tensorflow/core/kernels/reduction_ops_sum.cc
+++ b/tensorflow/core/kernels/reduction_ops_sum.cc
@ -33,7 +33,7 @@ namespace tensorflow {
 TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
 #undef REGISTER_CPU_KERNELS

-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

 #define REGISTER_GPU_KERNELS(type)                                             \
  REGISTER_KERNEL_BUILDER(                                                     \
@ -52,8 +52,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
      ReductionOp<GPUDevice, type, int64, Eigen::internal::SumReducer<type>>);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
 TF_CALL_int64(REGISTER_GPU_KERNELS);
+#if GOOGLE_CUDA
 TF_CALL_complex64(REGISTER_GPU_KERNELS);
 TF_CALL_complex128(REGISTER_GPU_KERNELS);
+#endif
 #undef REGISTER_GPU_KERNELS

 // A special GPU kernel for int32.