From e0c05bb427e800f83d651b8b75676b1e17f370cb Mon Sep 17 00:00:00 2001
From: Deven Desai <deven.desai.amd@gmail.com>
Date: Sun, 19 May 2019 02:28:04 +0000
Subject: [PATCH 1/2] [ROCm] Fix for the broken `--config=rocm` build.

This PR contains a fix for the broken `--config=rocm` build.

Currently the file `tensorflow/core/util/gpuLaunch_config.h` is not included in any code that is enabled for the `--config=rocm` build. Once that is included within ROCm enabled code, it will break the `--config=rocm` build, because that file currrently contains a couple of Cuda* names that should not be visible in the ROCm build. This commit/PR fixes that.
---
 tensorflow/core/util/gpu_launch_config.h | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h
index 565fff8ed47..bcc139ab59d 100644
--- a/tensorflow/core/util/gpu_launch_config.h
+++ b/tensorflow/core/util/gpu_launch_config.h
@@ -142,10 +142,12 @@ inline GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
   config.block_count = block_count;
   return config;
 }
+#ifndef TENSORFLOW_USE_ROCM
 inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
                                             const Eigen::GpuDevice& d) {
   return GetGpuLaunchConfig(work_element_count, d);
 }
+#endif
 
 // Calculate the GPU launch config we should use for a kernel launch. This
 // variant takes the resource limits of func into account to maximize occupancy.
@@ -275,10 +277,12 @@ inline Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
       grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1);
   return config;
 }
+#ifndef TENSORFLOW_USE_ROCM
 inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
                                                 const Eigen::GpuDevice& d) {
   return GetGpu2DLaunchConfig(xdim, ydim, d);
 }
+#endif
 
 // Calculate the GPU 2D and 3D launch config we should use for a kernel launch.
 // This variant takes the resource limits of func into account to maximize

From 3e9beeb2f8dc621ca5951e1ecc99c4b25b638b3c Mon Sep 17 00:00:00 2001
From: Deven Desai <deven.desai.amd@gmail.com>
Date: Sun, 19 May 2019 18:48:47 +0000
Subject: [PATCH 2/2] renaming namespace cuda_helper to gpu_helper.

Also adding a `cuda_helper` as an alias for `gpu_helper` in non ROCm mode (for backwards compatibility)
---
 tensorflow/core/util/gpu_kernel_helper.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/util/gpu_kernel_helper.h b/tensorflow/core/util/gpu_kernel_helper.h
index 133092540b7..abf0faa3f20 100644
--- a/tensorflow/core/util/gpu_kernel_helper.h
+++ b/tensorflow/core/util/gpu_kernel_helper.h
@@ -160,7 +160,7 @@ __device__ EIGEN_ALWAYS_INLINE Eigen::half GpuShuffleXorSync(
 // Aliased in gpu_device_functions.h
 #endif
 
-namespace cuda_helper {
+namespace gpu_helper {
 template <typename T, typename OutType = int32>
 __device__ OutType upper_bound(const T* first, OutType count, T val) {
   const T* orig = first;
@@ -202,6 +202,11 @@ __device__ OutType lower_bound(const T* first, OutType count, T val) {
 }
 
 }  // namespace cuda_helper
+
+#ifndef TENSORFLOW_USE_ROCM
+  namespace cuda_helper = gpu_helper;
+#endif
+
 }  // namespace tensorflow
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM