Merge pull request #28834 from ROCmSoftwarePlatform:google_upstream_rocm_platform_fix_190518
PiperOrigin-RevId: 249807581
This commit is contained in:
commit
bb6eee97e0
@ -160,7 +160,7 @@ __device__ EIGEN_ALWAYS_INLINE Eigen::half GpuShuffleXorSync(
|
|||||||
// Aliased in gpu_device_functions.h
|
// Aliased in gpu_device_functions.h
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cuda_helper {
|
namespace gpu_helper {
|
||||||
template <typename T, typename OutType = int32>
|
template <typename T, typename OutType = int32>
|
||||||
__device__ OutType upper_bound(const T* first, OutType count, T val) {
|
__device__ OutType upper_bound(const T* first, OutType count, T val) {
|
||||||
const T* orig = first;
|
const T* orig = first;
|
||||||
@ -201,7 +201,12 @@ __device__ OutType lower_bound(const T* first, OutType count, T val) {
|
|||||||
return first - orig;
|
return first - orig;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cuda_helper
|
} // namespace gpu_helper
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_USE_ROCM
|
||||||
|
namespace cuda_helper = gpu_helper;
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
|
@ -142,10 +142,12 @@ inline GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
|
|||||||
config.block_count = block_count;
|
config.block_count = block_count;
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
#ifndef TENSORFLOW_USE_ROCM
|
||||||
inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
|
inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
|
||||||
const Eigen::GpuDevice& d) {
|
const Eigen::GpuDevice& d) {
|
||||||
return GetGpuLaunchConfig(work_element_count, d);
|
return GetGpuLaunchConfig(work_element_count, d);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Calculate the GPU launch config we should use for a kernel launch. This
|
// Calculate the GPU launch config we should use for a kernel launch. This
|
||||||
// variant takes the resource limits of func into account to maximize occupancy.
|
// variant takes the resource limits of func into account to maximize occupancy.
|
||||||
@ -275,10 +277,12 @@ inline Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
|
|||||||
grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1);
|
grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1);
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
#ifndef TENSORFLOW_USE_ROCM
|
||||||
inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
|
inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
|
||||||
const Eigen::GpuDevice& d) {
|
const Eigen::GpuDevice& d) {
|
||||||
return GetGpu2DLaunchConfig(xdim, ydim, d);
|
return GetGpu2DLaunchConfig(xdim, ydim, d);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Calculate the GPU 2D and 3D launch config we should use for a kernel launch.
|
// Calculate the GPU 2D and 3D launch config we should use for a kernel launch.
|
||||||
// This variant takes the resource limits of func into account to maximize
|
// This variant takes the resource limits of func into account to maximize
|
||||||
|
Loading…
Reference in New Issue
Block a user