From ee4657facfb5805c85a7fa0d68493e642a22e51b Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Sun, 19 May 2019 11:44:16 -0700 Subject: [PATCH] [XLA:CPU] Run ScatterExpander much earlier in CPU pipeline. Before, the ScatterExpander was run after fusion (!), meaning that nothing it emitted would ever be fused. On my machine, this is good for a 3.2/2.6 = 1.2x speedup on the testcase from https://github.com/google/jax/issues/695. PiperOrigin-RevId: 248950865 --- tensorflow/compiler/xla/service/cpu/cpu_compiler.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index d852d0aae5d..a3e224824ba 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -297,6 +297,7 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn( pass.AddInvariantChecker(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false); + pass.AddPass(); pass.AddPass( /*rewrite_training_op=*/true, /*rewrite_inference_op=*/true, @@ -340,8 +341,6 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn( pipeline.AddPass(); - pipeline.AddPass(); - ReducePrecisionInsertion::AddPasses( &pipeline, module->config().debug_options(), ReducePrecisionInsertion::PassTiming::AFTER_FUSION);