Apply gpu-cuda forwarding host function macro

2019-05-09 21:02:24 +00:00 · 2019-05-09 21:02:24 +00:00 · aaf994daf7
commit aaf994daf7
parent a600315139
2 changed files with 12 additions and 32 deletions
--- a/tensorflow/core/util/gpu_cuda_alias.h
+++ b/tensorflow/core/util/gpu_cuda_alias.h
@ -17,14 +17,14 @@ limitations under the License.
 #define TENSORFLOW_CORE_UTIL_GPU_CUDA_ALIAS_H_

 // Several forwarding macros are defined in this file to serve for backward
-// compatibility usage as we migrating from Cuda prefixed function to Gpu
-// prefixed functions. Both Cuda and ROCm can unify under the new Gpu prefix
-// naming scheme. In the migration period, we provide equivalent Cuda* and Gpu*
-// function. Over time, all Cuda* functions will be deprecated.
+// compatibility usage as we migrating from CUDA prefixed function to GPU
+// prefixed functions. Both Cuda and ROCm can unify under the new GPU prefix
+// naming scheme. In the migration period, we provide equivalent CUDA* and GPU*
+// function. Over time, all CUDA* functions will be deprecated.

 namespace tensorflow {

-// CREATE_CUDA_HOST_FUNCTION_ALIAS forward the host function to its Cuda Alias.
+// CREATE_CUDA_HOST_FUNCTION_ALIAS forward the host function to its CUDA Alias.
 #ifndef TENSORFLOW_USE_ROCM
 #define CREATE_CUDA_HOST_FUNCTION_ALIAS(func, cuda_alias) \
  template <typename... Args>                             \
@ -36,7 +36,7 @@ namespace tensorflow {
 #define CREATE_CUDA_HOST_FUNCTION_ALIAS(func, cuda_alias)
 #endif

-// CREATE_CUDA_DEVICE_FUNCTION_ALIAS forward the device function to its Cuda
+// CREATE_CUDA_DEVICE_FUNCTION_ALIAS forward the device function to its CUDA
 // Alias.
 #ifndef TENSORFLOW_USE_ROCM
 #define CREATE_CUDA_DEVICE_FUNCTION_ALIAS(func, cuda_alias) \
@ -49,7 +49,7 @@ namespace tensorflow {
 #define CREATE_CUDA_DEVICE_FUNCTION_ALIAS(func, cuda_alias)
 #endif

-// CREATE_CUDA_TYPE_ALIAS forward the type to its Cuda Alias.
+// CREATE_CUDA_TYPE_ALIAS forward the type to its CUDA Alias.
 #ifndef TENSORFLOW_USE_ROCM
 #define CREATE_CUDA_TYPE_ALIAS(type, cuda_alias) using cuda_alias = type;
 #else
--- a/tensorflow/core/util/gpu_launch_config.h
+++ b/tensorflow/core/util/gpu_launch_config.h
@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/gpu_cuda_alias.h"

 // Usage of GetGpuLaunchConfig, GetGpu2DLaunchConfig, and
 // GetGpu3DLaunchConfig:
@ -192,14 +193,7 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
  config.block_count = block_count;
  return config;
 }
-template <typename DeviceFunc>
-CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
-                                     const Eigen::GpuDevice& d, DeviceFunc func,
-                                     size_t dynamic_shared_memory_size,
-                                     int block_size_limit) {
-  return GetGpuLaunchConfig(work_element_count, d, func,
-                            dynamic_shared_memory_size, block_size_limit);
-}
+CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpuLaunchConfig, GetCudaLaunchConfig);

 // Calculate the Cuda launch config we should use for a kernel launch. This
 // variant takes the resource limits of func into account to maximize occupancy.
@ -244,14 +238,7 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize(
  config.block_count = block_count;
  return config;
 }
-template <typename DeviceFunc>
-CudaLaunchConfig GetCudaLaunchConfigFixedBlockSize(
-    int work_element_count, const Eigen::GpuDevice& d, DeviceFunc func,
-    size_t dynamic_shared_memory_size, int fixed_block_size) {
-  return GetGpuLaunchConfigFixedBlockSize(work_element_count, d, func,
-                                          dynamic_shared_memory_size,
-                                          fixed_block_size);
-}
+CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpuLaunchConfigFixedBlockSize, GetCudaLaunchConfigFixedBlockSize);

 struct Gpu2DLaunchConfig {
  dim3 virtual_thread_count = dim3(0, 0, 0);
@ -368,15 +355,7 @@ Cuda3DLaunchConfig GetGpu3DLaunchConfig(int xdim, int ydim, int zdim,
  config.block_count = dim3(blocksx, blocksy, blocksz);
  return config;
 }
-template <typename DeviceFunc>
-Cuda3DLaunchConfig GetCuda3DLaunchConfig(int xdim, int ydim, int zdim,
-                                         const Eigen::GpuDevice& d,
-                                         DeviceFunc func,
-                                         size_t dynamic_shared_memory_size,
-                                         int block_size_limit) {
-  return GetGpu3DLaunchConfig(xdim, ydim, zdim, d, func,
-                              dynamic_shared_memory_size, block_size_limit);
-}
+CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpu3DLaunchConfig, GetCuda3DLaunchConfig);

 template <typename DeviceFunc>
 Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
@ -387,6 +366,7 @@ Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
  return GetGpu3DLaunchConfig(xdim, ydim, 1, d, func,
                              dynamic_shared_memory_size, block_size_limit);
 }
+CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpu2DLaunchConfig, GetCuda2DLaunchConfig);

 #if GOOGLE_CUDA
 // Returns a raw reference to the current cuda stream.  Required by a