diff --git a/tensorflow/core/kernels/population_count_op.cc b/tensorflow/core/kernels/population_count_op.cc index 12ff6b69f87..c08b1146439 100644 --- a/tensorflow/core/kernels/population_count_op.cc +++ b/tensorflow/core/kernels/population_count_op.cc @@ -122,7 +122,7 @@ struct PopulationCount { } // namespace functor -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define REGISTER_POPULATION_COUNT(type) \ REGISTER_KERNEL_BUILDER( \ @@ -158,6 +158,6 @@ TF_CALL_int64(DECLARE_GPU_SPEC); } // namespace functor -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } // namespace tensorflow diff --git a/tensorflow/core/kernels/population_count_op_gpu.cu.cc b/tensorflow/core/kernels/population_count_op_gpu.cu.cc index 22beadfe61a..a7d84b8f42d 100644 --- a/tensorflow/core/kernels/population_count_op_gpu.cu.cc +++ b/tensorflow/core/kernels/population_count_op_gpu.cu.cc @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU @@ -35,14 +35,14 @@ namespace functor { template __global__ void PopulationCountKernel(const int size, const T* input, uint8* output) { - CUDA_1D_KERNEL_LOOP(i, size) { output[i] = __popc(ldg(input + i)); } + GPU_1D_KERNEL_LOOP(i, size) { output[i] = __popc(ldg(input + i)); } } template <> __global__ void PopulationCountKernel(const int size, const int8* input, uint8* output) { // For some reason, __popc on a negative int8 gets confused. - CUDA_1D_KERNEL_LOOP(i, size) { + GPU_1D_KERNEL_LOOP(i, size) { output[i] = __popc(ldg(reinterpret_cast(input + i))); } } @@ -51,7 +51,7 @@ template <> __global__ void PopulationCountKernel(const int size, const int16* input, uint8* output) { // For some reason, __popc on a negative int16 gets confused. - CUDA_1D_KERNEL_LOOP(i, size) { + GPU_1D_KERNEL_LOOP(i, size) { output[i] = __popc(ldg(reinterpret_cast(input + i))); } } @@ -59,7 +59,7 @@ __global__ void PopulationCountKernel(const int size, const int16* input, template <> __global__ void PopulationCountKernel(const int size, const int64* input, uint8* output) { - CUDA_1D_KERNEL_LOOP(i, size) { output[i] = __popcll(ldg(input + i)); } + GPU_1D_KERNEL_LOOP(i, size) { output[i] = __popcll(ldg(input + i)); } } #define DEFINE_GPU_SPECS(T) \ @@ -69,8 +69,8 @@ __global__ void PopulationCountKernel(const int size, const int64* input, TTypes::Flat output) { \ const GPUDevice& d = c->eigen_device(); \ int64 total_count = input.size(); \ - GpuLaunchConfig config = GetCudaLaunchConfig(total_count, d); \ - TF_CHECK_OK(CudaLaunchKernel(PopulationCountKernel, config.block_count, \ + GpuLaunchConfig config = GetGpuLaunchConfig(total_count, d); \ + TF_CHECK_OK(GpuLaunchKernel(PopulationCountKernel, config.block_count, \ config.thread_per_block, 0, d.stream(), \ total_count, input.data(), output.data())); \ } @@ -88,4 +88,4 @@ TF_CALL_int64(DEFINE_GPU_SPECS); } // namespace tensorflow -#endif // GOOGLE_CUDA +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM