Move layout assignment before instruction fusion for the CPU backend.

This means we will also have layouts inside fusion nodes, which in turn can potentially enable us to calculate linear index values for array accesses which can be reused. PiperOrigin-RevId: 236815199
2019-03-05 02:48:02 -08:00 · 2019-03-05 02:48:02 -08:00 · b0315f0960
commit b0315f0960
parent 55cd3abd02
1 changed files with 5 additions and 4 deletions
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@ -318,6 +318,11 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
      },
      TransposeFolding::NeverFoldTranspose);
  pipeline.AddPass<HloCSE>(/*is_layout_sensitive=*/false);
+
+  pipeline.AddPass<CpuLayoutAssignment>(
+      module->mutable_entry_computation_layout(),
+      LayoutAssignment::InstructionCanChangeLayout, target_machine_features);
+
  pipeline.AddPass<CpuInstructionFusion>();

  pipeline.AddPass<ScatterExpander>();
@ -325,10 +330,6 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
  ReducePrecisionInsertion::AddPasses(
      &pipeline, module->config().debug_options(),
      ReducePrecisionInsertion::PassTiming::AFTER_FUSION);
-
-  pipeline.AddPass<CpuLayoutAssignment>(
-      module->mutable_entry_computation_layout(),
-      LayoutAssignment::InstructionCanChangeLayout, target_machine_features);
  return pipeline.Run(module).status();
 }