Merge pull request #35401 from ROCmSoftwarePlatform:google_upstream_rocm_documentation
PiperOrigin-RevId: 288929060 Change-Id: I9179465fb654df9130bb51c54fd9a35a3cd8f0c1
This commit is contained in:
commit
33963418c6
@ -168,23 +168,18 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count,
|
||||
block_size_limit);
|
||||
CHECK_EQ(err, cudaSuccess);
|
||||
#elif TENSORFLOW_USE_ROCM
|
||||
// ROCM TODO re-enable this after hipOccupancyMaxPotentialBlockSize is
|
||||
// implemented
|
||||
// hipError_t err = hipOccupancyMaxPotentialBlockSize(
|
||||
// &block_count, &thread_per_block, func, dynamic_shared_memory_size,
|
||||
// block_size_limit);
|
||||
// CHECK_EQ(err, hipSuccess);
|
||||
|
||||
// Apply the heuristic in GetGpuLaunchConfig(int, const Eigen::GpuDevice&)
|
||||
// that the kernel is quite simple and will largely be memory-limited.
|
||||
const int physical_thread_count = std::min(
|
||||
d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(),
|
||||
work_element_count);
|
||||
// Assume the kernel be simple enough that it is okay to use 1024 threads
|
||||
// per workgroup.
|
||||
thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock());
|
||||
block_count = std::min(DivUp(physical_thread_count, thread_per_block),
|
||||
d.getNumGpuMultiProcessors());
|
||||
// Earlier versions of this HIP routine incorrectly returned void.
|
||||
// TODO re-enable hipError_t error checking when HIP is fixed.
|
||||
// ROCm interface uses unsigned int, convert after checking
|
||||
uint32_t block_count_uint = 0;
|
||||
uint32_t thread_per_block_uint = 0;
|
||||
CHECK_GE(block_size_limit, 0);
|
||||
uint32_t block_size_limit_uint = static_cast<uint32_t>(block_size_limit);
|
||||
hipOccupancyMaxPotentialBlockSize(&block_count_uint, &thread_per_block_uint,
|
||||
func, dynamic_shared_memory_size,
|
||||
block_size_limit_uint);
|
||||
block_count = static_cast<int>(block_count_uint);
|
||||
thread_per_block = static_cast<int>(thread_per_block_uint);
|
||||
#endif
|
||||
|
||||
block_count =
|
||||
|
Loading…
Reference in New Issue
Block a user