Use ForLoopSpecializationPass to help with vectorization of kernels.

PiperOrigin-RevId: 317843378 Change-Id: I4ead02c24f957269888af5491934567cd3e311fb
2020-06-23 04:56:42 -07:00 · 2020-06-23 04:56:42 -07:00 · af94e801cf
commit af94e801cf
parent 8eea0658d4
1 changed files with 5 additions and 0 deletions
--- a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
@ -505,6 +505,11 @@ Status LowerLHLOToGPU(mlir::ModuleOp module, LowerLHLOToGPUOptions options) {
  // Some basic cleanup.
  pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass());
  pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
+  // Make loops with min bounds into a conditional plus static bounds.
+  // Only do this if we unrolled in the first place.
+  if (!options.unroll_factors.empty()) {
+    pm.addNestedPass<::mlir::FuncOp>(mlir::createForLoopSpecializationPass());
+  }
  // Approximate of requested.
  if (options.use_approximations) {
    pm.addNestedPass<::mlir::FuncOp>(