Apply gpu-cuda forwarding host function macro
This commit is contained in:
parent
a600315139
commit
aaf994daf7
@ -17,14 +17,14 @@ limitations under the License.
|
||||
#define TENSORFLOW_CORE_UTIL_GPU_CUDA_ALIAS_H_
|
||||
|
||||
// Several forwarding macros are defined in this file to serve for backward
|
||||
// compatibility usage as we migrating from Cuda prefixed function to Gpu
|
||||
// prefixed functions. Both Cuda and ROCm can unify under the new Gpu prefix
|
||||
// naming scheme. In the migration period, we provide equivalent Cuda* and Gpu*
|
||||
// function. Over time, all Cuda* functions will be deprecated.
|
||||
// compatibility usage as we migrating from CUDA prefixed function to GPU
|
||||
// prefixed functions. Both Cuda and ROCm can unify under the new GPU prefix
|
||||
// naming scheme. In the migration period, we provide equivalent CUDA* and GPU*
|
||||
// function. Over time, all CUDA* functions will be deprecated.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// CREATE_CUDA_HOST_FUNCTION_ALIAS forward the host function to its Cuda Alias.
|
||||
// CREATE_CUDA_HOST_FUNCTION_ALIAS forward the host function to its CUDA Alias.
|
||||
#ifndef TENSORFLOW_USE_ROCM
|
||||
#define CREATE_CUDA_HOST_FUNCTION_ALIAS(func, cuda_alias) \
|
||||
template <typename... Args> \
|
||||
@ -36,7 +36,7 @@ namespace tensorflow {
|
||||
#define CREATE_CUDA_HOST_FUNCTION_ALIAS(func, cuda_alias)
|
||||
#endif
|
||||
|
||||
// CREATE_CUDA_DEVICE_FUNCTION_ALIAS forward the device function to its Cuda
|
||||
// CREATE_CUDA_DEVICE_FUNCTION_ALIAS forward the device function to its CUDA
|
||||
// Alias.
|
||||
#ifndef TENSORFLOW_USE_ROCM
|
||||
#define CREATE_CUDA_DEVICE_FUNCTION_ALIAS(func, cuda_alias) \
|
||||
@ -49,7 +49,7 @@ namespace tensorflow {
|
||||
#define CREATE_CUDA_DEVICE_FUNCTION_ALIAS(func, cuda_alias)
|
||||
#endif
|
||||
|
||||
// CREATE_CUDA_TYPE_ALIAS forward the type to its Cuda Alias.
|
||||
// CREATE_CUDA_TYPE_ALIAS forward the type to its CUDA Alias.
|
||||
#ifndef TENSORFLOW_USE_ROCM
|
||||
#define CREATE_CUDA_TYPE_ALIAS(type, cuda_alias) using cuda_alias = type;
|
||||
#else
|
||||
|
@ -26,6 +26,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/stream_executor.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/core/util/gpu_cuda_alias.h"
|
||||
|
||||
// Usage of GetGpuLaunchConfig, GetGpu2DLaunchConfig, and
|
||||
// GetGpu3DLaunchConfig:
|
||||
@ -192,14 +193,7 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
|
||||
config.block_count = block_count;
|
||||
return config;
|
||||
}
|
||||
template <typename DeviceFunc>
|
||||
CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
|
||||
const Eigen::GpuDevice& d, DeviceFunc func,
|
||||
size_t dynamic_shared_memory_size,
|
||||
int block_size_limit) {
|
||||
return GetGpuLaunchConfig(work_element_count, d, func,
|
||||
dynamic_shared_memory_size, block_size_limit);
|
||||
}
|
||||
CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpuLaunchConfig, GetCudaLaunchConfig);
|
||||
|
||||
// Calculate the Cuda launch config we should use for a kernel launch. This
|
||||
// variant takes the resource limits of func into account to maximize occupancy.
|
||||
@ -244,14 +238,7 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize(
|
||||
config.block_count = block_count;
|
||||
return config;
|
||||
}
|
||||
template <typename DeviceFunc>
|
||||
CudaLaunchConfig GetCudaLaunchConfigFixedBlockSize(
|
||||
int work_element_count, const Eigen::GpuDevice& d, DeviceFunc func,
|
||||
size_t dynamic_shared_memory_size, int fixed_block_size) {
|
||||
return GetGpuLaunchConfigFixedBlockSize(work_element_count, d, func,
|
||||
dynamic_shared_memory_size,
|
||||
fixed_block_size);
|
||||
}
|
||||
CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpuLaunchConfigFixedBlockSize, GetCudaLaunchConfigFixedBlockSize);
|
||||
|
||||
struct Gpu2DLaunchConfig {
|
||||
dim3 virtual_thread_count = dim3(0, 0, 0);
|
||||
@ -368,15 +355,7 @@ Cuda3DLaunchConfig GetGpu3DLaunchConfig(int xdim, int ydim, int zdim,
|
||||
config.block_count = dim3(blocksx, blocksy, blocksz);
|
||||
return config;
|
||||
}
|
||||
template <typename DeviceFunc>
|
||||
Cuda3DLaunchConfig GetCuda3DLaunchConfig(int xdim, int ydim, int zdim,
|
||||
const Eigen::GpuDevice& d,
|
||||
DeviceFunc func,
|
||||
size_t dynamic_shared_memory_size,
|
||||
int block_size_limit) {
|
||||
return GetGpu3DLaunchConfig(xdim, ydim, zdim, d, func,
|
||||
dynamic_shared_memory_size, block_size_limit);
|
||||
}
|
||||
CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpu3DLaunchConfig, GetCuda3DLaunchConfig);
|
||||
|
||||
template <typename DeviceFunc>
|
||||
Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
|
||||
@ -387,6 +366,7 @@ Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
|
||||
return GetGpu3DLaunchConfig(xdim, ydim, 1, d, func,
|
||||
dynamic_shared_memory_size, block_size_limit);
|
||||
}
|
||||
CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpu2DLaunchConfig, GetCuda2DLaunchConfig);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Returns a raw reference to the current cuda stream. Required by a
|
||||
|
Loading…
x
Reference in New Issue
Block a user