Adding ROCm support for reduction ops
This commit is contained in:
parent
28d194b368
commit
69a449a49f
tensorflow/core/kernels
reduction_ops.hreduction_ops_all.ccreduction_ops_any.ccreduction_ops_common_gpu.hreduction_ops_euclidean.ccreduction_ops_gpu_bool.cu.ccreduction_ops_gpu_double.cu.ccreduction_ops_gpu_float.cu.ccreduction_ops_gpu_int.cu.ccreduction_ops_half_mean_sum.cu.ccreduction_ops_half_prod_max_min.cu.ccreduction_ops_max.ccreduction_ops_mean.ccreduction_ops_min.ccreduction_ops_prod.ccreduction_ops_sum.cc
@ -117,6 +117,10 @@ struct Identity {
|
||||
FIX_MEAN_IDENTITY(Eigen::half)
|
||||
FIX_MEAN_IDENTITY(float)
|
||||
FIX_MEAN_IDENTITY(double)
|
||||
#if GOOGLE_CUDA
|
||||
FIX_MEAN_IDENTITY(complex64)
|
||||
FIX_MEAN_IDENTITY(complex128)
|
||||
#endif
|
||||
#undef FIX_MEAN_IDENTITY
|
||||
|
||||
template <typename Device, typename OUT_T, typename Reducer>
|
||||
|
@ -30,7 +30,7 @@ REGISTER_KERNEL_BUILDER(
|
||||
.HostMemory("reduction_indices"),
|
||||
ReductionOp<CPUDevice, bool, int64, Eigen::internal::AndReducer>);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("All")
|
||||
.TypeConstraint<int32>("Tidx")
|
||||
|
@ -30,7 +30,7 @@ REGISTER_KERNEL_BUILDER(
|
||||
.HostMemory("reduction_indices"),
|
||||
ReductionOp<CPUDevice, bool, int64, Eigen::internal::OrReducer>);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
REGISTER_KERNEL_BUILDER(
|
||||
Name("Any")
|
||||
.TypeConstraint<int32>("Tidx")
|
||||
|
@ -15,8 +15,8 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_GPU_H_
|
||||
#define TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_GPU_H_
|
||||
|
||||
#if !GOOGLE_CUDA
|
||||
#error This file must only be included when building with Cuda support
|
||||
#if !GOOGLE_CUDA && !TENSORFLOW_USE_ROCM
|
||||
#error This file must only be included when building with GPU support
|
||||
#endif
|
||||
|
||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||
|
@ -33,7 +33,7 @@ namespace tensorflow {
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define REGISTER_GPU_KERNELS(type) \
|
||||
REGISTER_KERNEL_BUILDER(Name("EuclideanNorm") \
|
||||
@ -51,8 +51,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
ReductionOp<GPUDevice, type, int64, \
|
||||
functor::EuclideanNormReducer<type>>);
|
||||
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
|
||||
#if GOOGLE_CUDA
|
||||
TF_CALL_complex64(REGISTER_GPU_KERNELS);
|
||||
TF_CALL_complex128(REGISTER_GPU_KERNELS);
|
||||
#endif
|
||||
#undef REGISTER_GPU_KERNELS
|
||||
|
||||
#endif
|
||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
@ -59,4 +59,4 @@ DEFINE_FOR_TYPE_AND_R(bool, Eigen::internal::OrReducer);
|
||||
} // end namespace functor
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
@ -67,4 +67,4 @@ DEFINE_FOR_ALL_REDUCERS(double);
|
||||
} // end namespace functor
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
@ -67,4 +67,4 @@ DEFINE_FOR_ALL_REDUCERS(float);
|
||||
} // end namespace functor
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
@ -68,4 +68,4 @@ DEFINE_FOR_ALL_REDUCERS(int64);
|
||||
} // end namespace functor
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
@ -64,4 +64,4 @@ DEFINE_FOR_ALL_REDUCERS(Eigen::half);
|
||||
} // end namespace functor
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
@ -64,4 +64,4 @@ DEFINE_FOR_ALL_REDUCERS(Eigen::half);
|
||||
} // end namespace functor
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -33,7 +33,7 @@ namespace tensorflow {
|
||||
TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define REGISTER_GPU_KERNELS(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
|
@ -33,7 +33,7 @@ namespace tensorflow {
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define REGISTER_GPU_KERNELS(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
@ -51,8 +51,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
.HostMemory("reduction_indices"), \
|
||||
ReductionOp<GPUDevice, type, int64, functor::MeanReducer<type>>);
|
||||
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
|
||||
#if GOOGLE_CUDA
|
||||
TF_CALL_complex64(REGISTER_GPU_KERNELS);
|
||||
TF_CALL_complex128(REGISTER_GPU_KERNELS);
|
||||
#endif
|
||||
#undef REGISTER_GPU_KERNELS
|
||||
|
||||
#endif
|
||||
|
@ -33,7 +33,7 @@ namespace tensorflow {
|
||||
TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define REGISTER_GPU_KERNELS(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
|
@ -33,7 +33,7 @@ namespace tensorflow {
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define REGISTER_GPU_KERNELS(type) \
|
||||
REGISTER_KERNEL_BUILDER(Name("Prod") \
|
||||
@ -52,8 +52,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
Eigen::internal::ProdReducer<type>>);
|
||||
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
|
||||
TF_CALL_int32(REGISTER_GPU_KERNELS);
|
||||
#if GOOGLE_CUDA
|
||||
TF_CALL_complex64(REGISTER_GPU_KERNELS);
|
||||
TF_CALL_complex128(REGISTER_GPU_KERNELS);
|
||||
#endif
|
||||
#undef REGISTER_GPU_KERNELS
|
||||
|
||||
#endif
|
||||
|
@ -33,7 +33,7 @@ namespace tensorflow {
|
||||
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
#undef REGISTER_CPU_KERNELS
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
||||
#define REGISTER_GPU_KERNELS(type) \
|
||||
REGISTER_KERNEL_BUILDER( \
|
||||
@ -52,8 +52,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
|
||||
ReductionOp<GPUDevice, type, int64, Eigen::internal::SumReducer<type>>);
|
||||
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
|
||||
TF_CALL_int64(REGISTER_GPU_KERNELS);
|
||||
#if GOOGLE_CUDA
|
||||
TF_CALL_complex64(REGISTER_GPU_KERNELS);
|
||||
TF_CALL_complex128(REGISTER_GPU_KERNELS);
|
||||
#endif
|
||||
#undef REGISTER_GPU_KERNELS
|
||||
|
||||
// A special GPU kernel for int32.
|
||||
|
Loading…
Reference in New Issue
Block a user