Merge pull request #28834 from ROCmSoftwarePlatform:google_upstream_rocm_platform_fix_190518
PiperOrigin-RevId: 249807581
This commit is contained in:
commit
bb6eee97e0
@ -160,7 +160,7 @@ __device__ EIGEN_ALWAYS_INLINE Eigen::half GpuShuffleXorSync(
|
||||
// Aliased in gpu_device_functions.h
|
||||
#endif
|
||||
|
||||
namespace cuda_helper {
|
||||
namespace gpu_helper {
|
||||
template <typename T, typename OutType = int32>
|
||||
__device__ OutType upper_bound(const T* first, OutType count, T val) {
|
||||
const T* orig = first;
|
||||
@ -201,7 +201,12 @@ __device__ OutType lower_bound(const T* first, OutType count, T val) {
|
||||
return first - orig;
|
||||
}
|
||||
|
||||
} // namespace cuda_helper
|
||||
} // namespace gpu_helper
|
||||
|
||||
#ifndef TENSORFLOW_USE_ROCM
|
||||
namespace cuda_helper = gpu_helper;
|
||||
#endif
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||
|
@ -142,10 +142,12 @@ inline GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
|
||||
config.block_count = block_count;
|
||||
return config;
|
||||
}
|
||||
#ifndef TENSORFLOW_USE_ROCM
|
||||
inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
|
||||
const Eigen::GpuDevice& d) {
|
||||
return GetGpuLaunchConfig(work_element_count, d);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Calculate the GPU launch config we should use for a kernel launch. This
|
||||
// variant takes the resource limits of func into account to maximize occupancy.
|
||||
@ -275,10 +277,12 @@ inline Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
|
||||
grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1);
|
||||
return config;
|
||||
}
|
||||
#ifndef TENSORFLOW_USE_ROCM
|
||||
inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
|
||||
const Eigen::GpuDevice& d) {
|
||||
return GetGpu2DLaunchConfig(xdim, ydim, d);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Calculate the GPU 2D and 3D launch config we should use for a kernel launch.
|
||||
// This variant takes the resource limits of func into account to maximize
|
||||
|
Loading…
Reference in New Issue
Block a user