From e0c05bb427e800f83d651b8b75676b1e17f370cb Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Sun, 19 May 2019 02:28:04 +0000 Subject: [PATCH 1/2] [ROCm] Fix for the broken `--config=rocm` build. This PR contains a fix for the broken `--config=rocm` build. Currently the file `tensorflow/core/util/gpuLaunch_config.h` is not included in any code that is enabled for the `--config=rocm` build. Once that is included within ROCm enabled code, it will break the `--config=rocm` build, because that file currrently contains a couple of Cuda* names that should not be visible in the ROCm build. This commit/PR fixes that. --- tensorflow/core/util/gpu_launch_config.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h index 565fff8ed47..bcc139ab59d 100644 --- a/tensorflow/core/util/gpu_launch_config.h +++ b/tensorflow/core/util/gpu_launch_config.h @@ -142,10 +142,12 @@ inline GpuLaunchConfig GetGpuLaunchConfig(int work_element_count, config.block_count = block_count; return config; } +#ifndef TENSORFLOW_USE_ROCM inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count, const Eigen::GpuDevice& d) { return GetGpuLaunchConfig(work_element_count, d); } +#endif // Calculate the GPU launch config we should use for a kernel launch. This // variant takes the resource limits of func into account to maximize occupancy. @@ -275,10 +277,12 @@ inline Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim, grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1); return config; } +#ifndef TENSORFLOW_USE_ROCM inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim, const Eigen::GpuDevice& d) { return GetGpu2DLaunchConfig(xdim, ydim, d); } +#endif // Calculate the GPU 2D and 3D launch config we should use for a kernel launch. // This variant takes the resource limits of func into account to maximize From 3e9beeb2f8dc621ca5951e1ecc99c4b25b638b3c Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Sun, 19 May 2019 18:48:47 +0000 Subject: [PATCH 2/2] renaming namespace cuda_helper to gpu_helper. Also adding a `cuda_helper` as an alias for `gpu_helper` in non ROCm mode (for backwards compatibility) --- tensorflow/core/util/gpu_kernel_helper.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/util/gpu_kernel_helper.h b/tensorflow/core/util/gpu_kernel_helper.h index 133092540b7..abf0faa3f20 100644 --- a/tensorflow/core/util/gpu_kernel_helper.h +++ b/tensorflow/core/util/gpu_kernel_helper.h @@ -160,7 +160,7 @@ __device__ EIGEN_ALWAYS_INLINE Eigen::half GpuShuffleXorSync( // Aliased in gpu_device_functions.h #endif -namespace cuda_helper { +namespace gpu_helper { template __device__ OutType upper_bound(const T* first, OutType count, T val) { const T* orig = first; @@ -202,6 +202,11 @@ __device__ OutType lower_bound(const T* first, OutType count, T val) { } } // namespace cuda_helper + +#ifndef TENSORFLOW_USE_ROCM + namespace cuda_helper = gpu_helper; +#endif + } // namespace tensorflow #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM