Adding ROCm support for reduction ops

This commit is contained in:
Deven Desai 2019-06-25 15:26:27 +00:00
parent 28d194b368
commit 69a449a49f
16 changed files with 34 additions and 22 deletions

View File

@ -117,6 +117,10 @@ struct Identity {
FIX_MEAN_IDENTITY(Eigen::half)
FIX_MEAN_IDENTITY(float)
FIX_MEAN_IDENTITY(double)
#if GOOGLE_CUDA
FIX_MEAN_IDENTITY(complex64)
FIX_MEAN_IDENTITY(complex128)
#endif
#undef FIX_MEAN_IDENTITY
template <typename Device, typename OUT_T, typename Reducer>

View File

@ -30,7 +30,7 @@ REGISTER_KERNEL_BUILDER(
.HostMemory("reduction_indices"),
ReductionOp<CPUDevice, bool, int64, Eigen::internal::AndReducer>);
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
REGISTER_KERNEL_BUILDER(
Name("All")
.TypeConstraint<int32>("Tidx")

View File

@ -30,7 +30,7 @@ REGISTER_KERNEL_BUILDER(
.HostMemory("reduction_indices"),
ReductionOp<CPUDevice, bool, int64, Eigen::internal::OrReducer>);
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
REGISTER_KERNEL_BUILDER(
Name("Any")
.TypeConstraint<int32>("Tidx")

View File

@ -15,8 +15,8 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_GPU_H_
#define TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_GPU_H_
#if !GOOGLE_CUDA
#error This file must only be included when building with Cuda support
#if !GOOGLE_CUDA && !TENSORFLOW_USE_ROCM
#error This file must only be included when building with GPU support
#endif
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"

View File

@ -33,7 +33,7 @@ namespace tensorflow {
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER(Name("EuclideanNorm") \
@ -51,8 +51,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
ReductionOp<GPUDevice, type, int64, \
functor::EuclideanNormReducer<type>>);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
#if GOOGLE_CUDA
TF_CALL_complex64(REGISTER_GPU_KERNELS);
TF_CALL_complex128(REGISTER_GPU_KERNELS);
#endif
#undef REGISTER_GPU_KERNELS
#endif

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
@ -59,4 +59,4 @@ DEFINE_FOR_TYPE_AND_R(bool, Eigen::internal::OrReducer);
} // end namespace functor
} // end namespace tensorflow
#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
@ -67,4 +67,4 @@ DEFINE_FOR_ALL_REDUCERS(double);
} // end namespace functor
} // end namespace tensorflow
#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
@ -67,4 +67,4 @@ DEFINE_FOR_ALL_REDUCERS(float);
} // end namespace functor
} // end namespace tensorflow
#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
@ -68,4 +68,4 @@ DEFINE_FOR_ALL_REDUCERS(int64);
} // end namespace functor
} // end namespace tensorflow
#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
@ -64,4 +64,4 @@ DEFINE_FOR_ALL_REDUCERS(Eigen::half);
} // end namespace functor
} // end namespace tensorflow
#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
@ -64,4 +64,4 @@ DEFINE_FOR_ALL_REDUCERS(Eigen::half);
} // end namespace functor
} // end namespace tensorflow
#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -33,7 +33,7 @@ namespace tensorflow {
TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER( \

View File

@ -33,7 +33,7 @@ namespace tensorflow {
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER( \
@ -51,8 +51,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
.HostMemory("reduction_indices"), \
ReductionOp<GPUDevice, type, int64, functor::MeanReducer<type>>);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
#if GOOGLE_CUDA
TF_CALL_complex64(REGISTER_GPU_KERNELS);
TF_CALL_complex128(REGISTER_GPU_KERNELS);
#endif
#undef REGISTER_GPU_KERNELS
#endif

View File

@ -33,7 +33,7 @@ namespace tensorflow {
TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER( \

View File

@ -33,7 +33,7 @@ namespace tensorflow {
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER(Name("Prod") \
@ -52,8 +52,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
Eigen::internal::ProdReducer<type>>);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
TF_CALL_int32(REGISTER_GPU_KERNELS);
#if GOOGLE_CUDA
TF_CALL_complex64(REGISTER_GPU_KERNELS);
TF_CALL_complex128(REGISTER_GPU_KERNELS);
#endif
#undef REGISTER_GPU_KERNELS
#endif

View File

@ -33,7 +33,7 @@ namespace tensorflow {
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER( \
@ -52,8 +52,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
ReductionOp<GPUDevice, type, int64, Eigen::internal::SumReducer<type>>);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
TF_CALL_int64(REGISTER_GPU_KERNELS);
#if GOOGLE_CUDA
TF_CALL_complex64(REGISTER_GPU_KERNELS);
TF_CALL_complex128(REGISTER_GPU_KERNELS);
#endif
#undef REGISTER_GPU_KERNELS
// A special GPU kernel for int32.