From 8986d4eb87c281663377909ce16327cceadcc377 Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" <whchung@gmail.com> Date: Tue, 13 Aug 2019 16:31:25 +0000 Subject: [PATCH] [ROCm] enable InTopK op on ROCm. --- tensorflow/core/kernels/in_topk_op.cc | 4 ++-- tensorflow/core/kernels/in_topk_op.h | 4 ++-- tensorflow/core/kernels/in_topk_op_gpu.cu.cc | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/in_topk_op.cc b/tensorflow/core/kernels/in_topk_op.cc index a8ee00e080e..22d833395f0 100644 --- a/tensorflow/core/kernels/in_topk_op.cc +++ b/tensorflow/core/kernels/in_topk_op.cc @@ -116,7 +116,7 @@ REGISTER_KERNEL_BUILDER(Name("InTopKV2") .TypeConstraint<int64>("T"), InTopK<CPUDevice, float, int64>); -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Forward declarations of the functor specializations for GPU. namespace functor { @@ -142,6 +142,6 @@ REGISTER_KERNEL_BUILDER( Name("InTopKV2").Device(DEVICE_GPU).TypeConstraint<int64>("T"), InTopK<GPUDevice, float, int64>); -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace tensorflow diff --git a/tensorflow/core/kernels/in_topk_op.h b/tensorflow/core/kernels/in_topk_op.h index 52716f2d272..f48932cdbbf 100644 --- a/tensorflow/core/kernels/in_topk_op.h +++ b/tensorflow/core/kernels/in_topk_op.h @@ -16,9 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_IN_TOPK_OP_H_ #define TENSORFLOW_CORE_KERNELS_IN_TOPK_OP_H_ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/bounds_check.h" diff --git a/tensorflow/core/kernels/in_topk_op_gpu.cu.cc b/tensorflow/core/kernels/in_topk_op_gpu.cu.cc index 1894dedddf1..4c59e1f7bed 100644 --- a/tensorflow/core/kernels/in_topk_op_gpu.cu.cc +++ b/tensorflow/core/kernels/in_topk_op_gpu.cu.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU @@ -41,7 +41,7 @@ __global__ void ComputePredictionMaskKernel( const TargetT* targets, // dims: [ num_targets ] int64* mask, // dims: [ num_targets x num_classes ] int num_targets, int num_classes) { - CUDA_1D_KERNEL_LOOP(i, num_targets * num_classes) { + GPU_1D_KERNEL_LOOP(i, num_targets * num_classes) { const int batch_index = i / num_classes; TargetT target_idx = ldg(targets + batch_index); @@ -118,7 +118,7 @@ struct InTopKFunctor<GPUDevice, T, TargetT> { const auto& d = context->eigen_device<GPUDevice>(); // Compute a mask for all predictions. - CudaLaunchConfig config = GetGpuLaunchConfig(num_targets * num_classes, d); + GpuLaunchConfig config = GetGpuLaunchConfig(num_targets * num_classes, d); OP_REQUIRES_OK( context, GpuLaunchKernel(ComputePredictionMaskKernel<T, TargetT>, config.block_count, config.thread_per_block, 0, @@ -173,4 +173,4 @@ DEFINE_GPU_KERNELS(float, int64); } // end namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM