Automated g4 rollback of changelist 172041133
PiperOrigin-RevId: 172129075
This commit is contained in:
parent
6a04ffd447
commit
26cc81e405
@ -154,15 +154,11 @@ struct CudaLaunchConfig {
|
||||
// Calculate the Cuda launch config we should use for a kernel launch.
|
||||
// This is assuming the kernel is quite simple and will largely be
|
||||
// memory-limited.
|
||||
// REQUIRES: work_element_count > 0.
|
||||
inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
|
||||
const GPUDevice& d) {
|
||||
CHECK_GT(work_element_count, 0);
|
||||
CudaLaunchConfig config;
|
||||
|
||||
// in case of invalid input, return the default value config, which has all -1
|
||||
if (work_element_count <= 0) {
|
||||
return config;
|
||||
}
|
||||
|
||||
const int virtual_thread_count = work_element_count;
|
||||
const int physical_thread_count = std::min(
|
||||
d.getNumCudaMultiProcessors() * d.maxCudaThreadsPerMultiProcessor(),
|
||||
@ -180,17 +176,14 @@ inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
|
||||
|
||||
// Calculate the Cuda launch config we should use for a kernel launch. This
|
||||
// variant takes the resource limits of func into account to maximize occupancy.
|
||||
// REQUIRES: work_element_count > 0.
|
||||
template <typename DeviceFunc>
|
||||
inline CudaLaunchConfig GetCudaLaunchConfig(int work_element_count,
|
||||
const GPUDevice& d, DeviceFunc func,
|
||||
size_t dynamic_shared_memory_size,
|
||||
int block_size_limit) {
|
||||
CHECK_GT(work_element_count, 0);
|
||||
CudaLaunchConfig config;
|
||||
|
||||
if (work_element_count <= 0) {
|
||||
return config;
|
||||
}
|
||||
|
||||
int block_count = 0;
|
||||
int thread_per_block = 0;
|
||||
|
||||
|
@ -111,28 +111,6 @@ class CudaLaunchConfigTest : public ::testing::Test {
|
||||
TEST_F(CudaLaunchConfigTest, GetCudaLaunchConfig) {
|
||||
CudaLaunchConfig cfg;
|
||||
|
||||
// test invalid inputs
|
||||
CudaLaunchConfig default_value;
|
||||
cfg = GetCudaLaunchConfig(0, d);
|
||||
EXPECT_EQ(default_value.virtual_thread_count, cfg.virtual_thread_count);
|
||||
EXPECT_EQ(default_value.block_count, cfg.block_count);
|
||||
EXPECT_EQ(default_value.thread_per_block, cfg.thread_per_block);
|
||||
|
||||
cfg = GetCudaLaunchConfig(-1, d);
|
||||
EXPECT_EQ(default_value.virtual_thread_count, cfg.virtual_thread_count);
|
||||
EXPECT_EQ(default_value.block_count, cfg.block_count);
|
||||
EXPECT_EQ(default_value.thread_per_block, cfg.thread_per_block);
|
||||
|
||||
cfg = GetCudaLaunchConfig(0, d, Count1D, 0, 0);
|
||||
EXPECT_EQ(default_value.virtual_thread_count, cfg.virtual_thread_count);
|
||||
EXPECT_EQ(default_value.block_count, cfg.block_count);
|
||||
EXPECT_EQ(default_value.thread_per_block, cfg.thread_per_block);
|
||||
|
||||
cfg = GetCudaLaunchConfig(-1, d, Count1D, 0, 0);
|
||||
EXPECT_EQ(default_value.virtual_thread_count, cfg.virtual_thread_count);
|
||||
EXPECT_EQ(default_value.block_count, cfg.block_count);
|
||||
EXPECT_EQ(default_value.thread_per_block, cfg.thread_per_block);
|
||||
|
||||
// test valid inputs
|
||||
#define TEST_LAUNCH_PARAMETER(work_element_count) \
|
||||
cfg = GetCudaLaunchConfig(bufsize, d); \
|
||||
@ -184,34 +162,6 @@ TEST_F(CudaLaunchConfigTest, GetCuda2DLaunchConfig) {
|
||||
Cuda2DLaunchConfig cfg;
|
||||
CudaLaunchConfig cfg1d;
|
||||
|
||||
// test invalid inputs
|
||||
Cuda2DLaunchConfig default_value;
|
||||
cfg = GetCuda2DLaunchConfig(1, 0, d);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(1, -1, d);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(-1, 1, d);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(-1, 1, d);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(0, -1, d);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(0, 0, d);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
|
||||
cfg = GetCuda2DLaunchConfig(1, 0, d, Count2D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(1, -1, d, Count2D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(-1, 1, d, Count2D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(-1, 1, d, Count2D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(0, -1, d, Count2D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda2DLaunchConfig(0, 0, d, Count2D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
|
||||
// test valid inputs
|
||||
#define TEST_LAUNCH_PARAMETER(dimx, dimy) \
|
||||
cfg1d = GetCudaLaunchConfig(bufsize, d); \
|
||||
@ -252,25 +202,6 @@ TEST_F(CudaLaunchConfigTest, GetCuda3DLaunchConfig) {
|
||||
Cuda3DLaunchConfig cfg;
|
||||
CudaLaunchConfig cfg1d;
|
||||
|
||||
// test invalid inputs
|
||||
Cuda3DLaunchConfig default_value;
|
||||
cfg = GetCuda3DLaunchConfig(0, 1, 1, d, Count3D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda3DLaunchConfig(-1, 1, 1, d, Count3D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda3DLaunchConfig(1, 0, 1, d, Count3D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda3DLaunchConfig(1, -1, 1, d, Count3D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda3DLaunchConfig(1, 1, 0, d, Count3D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda3DLaunchConfig(1, 1, -1, d, Count3D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda3DLaunchConfig(0, 0, 0, d, Count3D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
cfg = GetCuda3DLaunchConfig(-1, -1, -1, d, Count3D, 0, 0);
|
||||
EXPECT_EQ(default_value, cfg);
|
||||
|
||||
// test valid inputs
|
||||
#define TEST_LAUNCH_PARAMETER(dimx, dimy, dimz) \
|
||||
cfg1d = GetCudaLaunchConfig(bufsize, d, SetOutbufZero, 0, 0); \
|
||||
|
@ -647,7 +647,7 @@ def tf_cuda_only_cc_test(name,
|
||||
clean_dep("//tensorflow:darwin"): 1,
|
||||
"//conditions:default": 0,
|
||||
}),
|
||||
tags=tags)
|
||||
tags=tags + tf_cuda_tests_tags())
|
||||
|
||||
# Create a cc_test for each of the tensorflow tests listed in "tests"
|
||||
def tf_cc_tests(srcs,
|
||||
|
Loading…
Reference in New Issue
Block a user