[XLA:GPU] layout_assignment: Force the default layout for FFT

cuFFT wants a dim0 major layout, otherwise XLA:GPU hits a check fail later. PiperOrigin-RevId: 240186519
2019-03-25 11:46:10 -07:00 · 2019-03-25 11:46:10 -07:00 · f9cda530e4
commit f9cda530e4
parent 9bb938c6f9
4 changed files with 41 additions and 1 deletions
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc
@ -214,6 +214,16 @@ Status GpuLayoutAssignment::AddBackendConstraints(
          constraints->SetOperandLayout(op1_shape, instruction, 1));
      TF_RETURN_IF_ERROR(
          constraints->SetInstructionLayout(output_shape, instruction));
+    } else if (instruction->opcode() == HloOpcode::kFft) {
+      // cuFFT requires a dim0 major layout.
+      Shape op0_shape = instruction->operand(0)->shape();
+      LayoutUtil::SetToDefaultLayout(&op0_shape);
+      Shape output_shape = instruction->shape();
+      LayoutUtil::SetToDefaultLayout(&output_shape);
+      TF_RETURN_IF_ERROR(
+          constraints->SetOperandLayout(op0_shape, instruction, 0));
+      TF_RETURN_IF_ERROR(
+          constraints->SetInstructionLayout(output_shape, instruction));
    } else if (instruction->opcode() == HloOpcode::kSort &&
               instruction->operand(0)->shape().rank() > 1) {
      // Make sure that all the operands and the output(s) have the same layout.
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
@ -402,6 +402,35 @@ TEST_F(LayoutAssignmentTest, SortLayout) {
                       op::ShapeWithLayout(expected_shape)));
 }

+TEST_F(LayoutAssignmentTest, FftLayout) {
+  const char* hlo_text = R"(
+  HloModule Fft_module
+
+  ENTRY Fft {
+    input = c64[8,32]{0,1} parameter(0)
+    fft = c64[8,32] fft(input), fft_type=FFT, fft_length={32}
+    ROOT transpose = c64[32,8] transpose(fft), dimensions={1,0}
+  })";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseHloString(hlo_text));
+
+  ComputationLayout computation_layout(
+      module->entry_computation()->ComputeProgramShape(),
+      /*ignore_layouts=*/false);
+  GpuLayoutAssignment layout_assignment(
+      &computation_layout, LayoutAssignment::InstructionCanChangeLayout,
+      backend().default_stream_executor());
+  EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie());
+
+  Shape expected_shape = ShapeUtil::MakeShapeWithLayout(C64, {8, 32}, {1, 0});
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Copy(op::Transpose(op::ShapeWithLayout(expected_shape))));
+  EXPECT_THAT(
+      module->entry_computation()->root_instruction(),
+      op::Copy(op::Transpose(op::Fft(op::ShapeWithLayout(expected_shape)))));
+}
+
 }  // namespace
 }  // namespace gpu
 }  // namespace xla
--- a/tensorflow/compiler/xla/service/hlo_matchers.h
+++ b/tensorflow/compiler/xla/service/hlo_matchers.h
@ -201,6 +201,7 @@ HLO_MATCHER(Domain);
 HLO_MATCHER(DynamicSlice);
 HLO_MATCHER(DynamicUpdateSlice);
 HLO_MATCHER(Exp);
+HLO_MATCHER(Fft);
 HLO_MATCHER(Floor);
 HLO_MATCHER(Fusion);
 HLO_MATCHER(AfterAll);
--- a/tensorflow/python/kernel_tests/linalg/BUILD
+++ b/tensorflow/python/kernel_tests/linalg/BUILD
@ -150,7 +150,7 @@ cuda_py_test(
        "noasan",  # times out, b/63678675
        "optonly",  # times out, b/79171797
    ],
-    xla_enable_strict_auto_jit = False,
+    xla_enable_strict_auto_jit = True,
 )

 cuda_py_test(