Merge pull request #28834 from ROCmSoftwarePlatform:google_upstream_rocm_platform_fix_190518

PiperOrigin-RevId: 249807581
This commit is contained in:
TensorFlower Gardener 2019-05-24 03:59:26 -07:00
commit bb6eee97e0
2 changed files with 11 additions and 2 deletions

View File

@ -160,7 +160,7 @@ __device__ EIGEN_ALWAYS_INLINE Eigen::half GpuShuffleXorSync(
// Aliased in gpu_device_functions.h
#endif
namespace cuda_helper {
namespace gpu_helper {
template <typename T, typename OutType = int32>
__device__ OutType upper_bound(const T* first, OutType count, T val) {
const T* orig = first;
@ -201,7 +201,12 @@ __device__ OutType lower_bound(const T* first, OutType count, T val) {
return first - orig;
}
} // namespace cuda_helper
} // namespace gpu_helper
#ifndef TENSORFLOW_USE_ROCM
namespace cuda_helper = gpu_helper;
#endif
} // namespace tensorflow
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

View File

@ -142,10 +142,12 @@ inline GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
config.block_count = block_count;
return config;
}
#ifndef TENSORFLOW_USE_ROCM
inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
const Eigen::GpuDevice& d) {
return GetGpuLaunchConfig(work_element_count, d);
}
#endif
// Calculate the GPU launch config we should use for a kernel launch. This
// variant takes the resource limits of func into account to maximize occupancy.
@ -275,10 +277,12 @@ inline Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
grid_x, std::min(max_blocks / grid_x, std::max(ydim / block_rows, 1)), 1);
return config;
}
#ifndef TENSORFLOW_USE_ROCM
inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
const Eigen::GpuDevice& d) {
return GetGpu2DLaunchConfig(xdim, ydim, d);
}
#endif
// Calculate the GPU 2D and 3D launch config we should use for a kernel launch.
// This variant takes the resource limits of func into account to maximize