From 62a3d533af6e575d06f2b35cc07507279d41e35d Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Mon, 14 Sep 2020 09:54:12 -0700 Subject: [PATCH] Fix a test on Maxwell GPUs. --- .../gpu/tests/reduction_vectorization_test.cc | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/tests/reduction_vectorization_test.cc b/tensorflow/compiler/xla/service/gpu/tests/reduction_vectorization_test.cc index 215c2e627ae..406af16977d 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/reduction_vectorization_test.cc +++ b/tensorflow/compiler/xla/service/gpu/tests/reduction_vectorization_test.cc @@ -336,21 +336,33 @@ ENTRY %cluster { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr optimized_module, ParseAndReturnVerifiedModule(hlo_text)); + const se::DeviceDescription& device_description = + backend().default_stream_executor()->GetDeviceDescription(); + int cc_major = 0, cc_minor = 0; + device_description.cuda_compute_capability(&cc_major, &cc_minor); + + string expected; + if (cc_major < 6) { + // We do not vectorize for GPU before Pascal. + expected = "CHECK-NOT: ld.global.nc.v2.f32"; + } else { + expected = R"( +CHECK: ld.global.nc.v2.f32 +CHECK: st.global.v2.f32 +CHECK: st.global.v2.f32 +CHECK: ld.global.nc.v2.f32 +CHECK: st.global.v2.f32 +CHECK: st.global.v2.f32 +CHECK: ld.global.nc.v2.f32 +CHECK: st.global.v2.f32 +CHECK: st.global.v2.f32 +CHECK: ld.global.nc.v2.f32 +CHECK: st.global.v2.f32 +CHECK: st.global.v2.f32 +)"; + } CompileAndOptionallyVerifyPtx(std::move(optimized_module), - R"( -CHECK: ld.global.nc.v2.f32 -CHECK: st.global.v2.f32 -CHECK: st.global.v2.f32 -CHECK: ld.global.nc.v2.f32 -CHECK: st.global.v2.f32 -CHECK: st.global.v2.f32 -CHECK: ld.global.nc.v2.f32 -CHECK: st.global.v2.f32 -CHECK: st.global.v2.f32 -CHECK: ld.global.nc.v2.f32 -CHECK: st.global.v2.f32 -CHECK: st.global.v2.f32 -)"); + expected); EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-5, 1e-5})); }