From b187ba0bcc04d471ee7cd60aaddbcdfc892e24c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 May 2020 01:46:54 -0700 Subject: [PATCH] Integrate LLVM at https://github.com/llvm/llvm-project/commit/bfa200ebcf37 PiperOrigin-RevId: 311490759 Change-Id: Icd37195b07135947a26f185a8d2a1ddc1adf718c --- .../mlir/xla/tests/lhlo-fuse-linalg.mlir | 52 +++++++++---------- .../lhlo-legalize-select-and-scatter.mlir | 34 ++++++------ .../mlir/xla/tests/lhlo-legalize-to-gpu.mlir | 2 +- .../lhlo-legalize-to-parallel-loops.mlir | 50 +++++++++--------- .../lhlo_legalize_to_parallel_loops.cc | 48 ++++++++--------- .../compiler/mlir/xla/transforms/passes.h | 4 +- 6 files changed, 95 insertions(+), 95 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-fuse-linalg.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-fuse-linalg.mlir index 013748fea28..99b1766e73c 100644 --- a/tensorflow/compiler/mlir/xla/tests/lhlo-fuse-linalg.mlir +++ b/tensorflow/compiler/mlir/xla/tests/lhlo-fuse-linalg.mlir @@ -24,9 +24,9 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>, // CHECK-LABEL: func @fusion // CHECK: %[[C1:.*]] = constant 1 // CHECK-NOT: linalg.generic -// CHECK: loop.for {{.*}} step %[[C1]] -// CHECK: loop.for {{.*}} step %[[C1]] -// CHECK-NOT: loop.for +// CHECK: scf.for {{.*}} step %[[C1]] +// CHECK: scf.for {{.*}} step %[[C1]] +// CHECK-NOT: scf.for // CHECK: linalg.generic // CHECK: addf // CHECK: linalg.generic @@ -36,9 +36,9 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>, // TILED-DAG: %[[C2:.*]] = constant 2 // TILED-DAG: %[[C3:.*]] = constant 3 // TILED-NOT: linalg.generic -// TILED: loop.for {{.*}} step %[[C2]] -// TILED: loop.for {{.*}} step %[[C3]] -// TILED-NOT: loop.for +// TILED: scf.for {{.*}} step %[[C2]] +// TILED: scf.for {{.*}} step %[[C3]] +// TILED-NOT: scf.for // TILED: linalg.generic // TILED: addf // TILED: linalg.generic @@ -46,8 +46,8 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>, // PLOOP-LABEL: func @fusion // PLOOP-NOT: linalg.generic -// PLOOP: loop.parallel -// PLOOP-NOT: loop.parallel +// PLOOP: scf.parallel +// PLOOP-NOT: scf.parallel // PLOOP: linalg.generic // PLOOP: addf // PLOOP: linalg.generic @@ -94,9 +94,9 @@ func @fusion_of_three(%arg0: memref<100x10xf32>, // CHECK-LABEL: func @fusion // CHECK: %[[C1:.*]] = constant 1 // CHECK-NOT: linalg.generic -// CHECK: loop.for {{.*}} step %[[C1]] -// CHECK: loop.for {{.*}} step %[[C1]] -// CHECK-NOT: loop.for +// CHECK: scf.for {{.*}} step %[[C1]] +// CHECK: scf.for {{.*}} step %[[C1]] +// CHECK-NOT: scf.for // CHECK: linalg.generic // CHECK: linalg.generic // CHECK: subf @@ -107,9 +107,9 @@ func @fusion_of_three(%arg0: memref<100x10xf32>, // TILED-DAG: %[[C2:.*]] = constant 2 // TILED-DAG: %[[C3:.*]] = constant 3 // TILED-NOT: linalg.generic -// TILED: loop.for {{.*}} step %[[C2]] -// TILED: loop.for {{.*}} step %[[C3]] -// TILED-NOT: loop.for +// TILED: scf.for {{.*}} step %[[C2]] +// TILED: scf.for {{.*}} step %[[C3]] +// TILED-NOT: scf.for // TILED: linalg.generic // TILED: linalg.generic // TILED: subf @@ -118,8 +118,8 @@ func @fusion_of_three(%arg0: memref<100x10xf32>, // PLOOP-LABEL: func @fusion_of_three // PLOOP-NOT: linalg.generic -// PLOOP: loop.parallel -// PLOOP-NOT: loop.parallel +// PLOOP: scf.parallel +// PLOOP-NOT: scf.parallel // PLOOP: linalg.generic // PLOOP: linalg.generic // PLOOP: subf @@ -147,11 +147,11 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32 // CHECK-LABEL: func @fusion_4d // CHECK: %[[C1:.*]] = constant 1 // CHECK-NOT: linalg.generic -// CHECK: loop.for {{.*}} step %[[C1]] -// CHECK: loop.for {{.*}} step %[[C1]] -// CHECK: loop.for {{.*}} step %[[C1]] -// CHECK: loop.for {{.*}} step %[[C1]] -// CHECK-NOT: loop.for +// CHECK: scf.for {{.*}} step %[[C1]] +// CHECK: scf.for {{.*}} step %[[C1]] +// CHECK: scf.for {{.*}} step %[[C1]] +// CHECK: scf.for {{.*}} step %[[C1]] +// CHECK-NOT: scf.for // CHECK: linalg.generic // CHECK: addf // CHECK: linalg.generic @@ -161,9 +161,9 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32 // TILED-DAG: %[[C2:.*]] = constant 2 // TILED-DAG: %[[C3:.*]] = constant 3 // TILED-NOT: linalg.generic -// TILED: loop.for {{.*}} step %[[C2]] -// TILED: loop.for {{.*}} step %[[C3]] -// TILED-NOT: loop.for +// TILED: scf.for {{.*}} step %[[C2]] +// TILED: scf.for {{.*}} step %[[C3]] +// TILED-NOT: scf.for // TILED: linalg.generic // TILED: addf // TILED: linalg.generic @@ -171,8 +171,8 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32 // PLOOP-LABEL: func @fusion_4d // PLOOP-NOT: linalg.generic -// PLOOP: loop.parallel -// PLOOP-NOT: loop.parallel +// PLOOP: scf.parallel +// PLOOP-NOT: scf.parallel // PLOOP: linalg.generic // PLOOP: addf // PLOOP: linalg.generic diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-select-and-scatter.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-select-and-scatter.mlir index 5b763cde2ed..c640b395f4d 100644 --- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-select-and-scatter.mlir +++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-select-and-scatter.mlir @@ -50,19 +50,19 @@ func @select_and_scatter(%arg: memref<112x112xf32>, // Parallel loop to initialize the output buffer. // CHECK: [[INIT:%.*]] = load [[INIT_BUF]][] : memref -// CHECK: loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) +// CHECK: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) // CHECK-SAME: to ([[C112]], [[C112]]) step ([[C1]], [[C1]]) { // CHECK: store [[INIT]], [[RESULT_BUF]]{{\[}}[[I]], [[J]]] -// CHECK: loop.yield +// CHECK: scf.yield // CHECK: } // Parallel loop over source buffer to compute scattered values. -// CHECK: loop.parallel ([[II:%.*]], [[JJ:%.*]]) = ([[C0]], [[C0]]) +// CHECK: scf.parallel ([[II:%.*]], [[JJ:%.*]]) = ([[C0]], [[C0]]) // CHECK-SAME: to ([[C56]], [[C56]]) step ([[C1]], [[C1]]) { // Window loop w.r.t. first dim. // CHECK: [[SEL_RES_I:%.*]]:4 -// CHECK-SAME: = loop.for [[WIN_I:%.*]] = [[C0]] to [[C3]] step [[C1]] +// CHECK-SAME: = scf.for [[WIN_I:%.*]] = [[C0]] to [[C3]] step [[C1]] // CHECK-SAME: iter_args( // CHECK-SAME: [[SEL_I_0:%.*]] = [[C0]], [[SEL_J_0:%.*]] = [[C0]], // CHECK-SAME: [[SEL_VAL_0:%.*]] = [[C0_F32]], @@ -71,7 +71,7 @@ func @select_and_scatter(%arg: memref<112x112xf32>, // Window loop w.r.t. second dim. // CHECK: [[SEL_RES_J:%.*]]:4 -// CHECK-SAME: = loop.for [[WIN_J:%.*]] = [[C0]] to [[C3]] step [[C1]] +// CHECK-SAME: = scf.for [[WIN_J:%.*]] = [[C0]] to [[C3]] step [[C1]] // CHECK-SAME: iter_args( // CHECK-SAME: [[SEL_I:%.*]] = [[SEL_I_0]], [[SEL_J:%.*]] = [[SEL_J_0]], // CHECK-SAME: [[SEL_VAL:%.*]] = [[SEL_VAL_0]], @@ -102,14 +102,14 @@ func @select_and_scatter(%arg: memref<112x112xf32>, // be applied, current selected ivs (SEL_I, SEL_J) and value (SEL_VAL) are // returned in that case. // CHECK: [[IF_INBOUNDS_RES:%.*]]:4 -// CHECK-SAME: = loop.if [[INBOUNDS_1]] -> (index, index, f32, i1) { +// CHECK-SAME: = scf.if [[INBOUNDS_1]] -> (index, index, f32, i1) { // INBOUNDS-THEN-BODY, i.e. if INBOUNDS == true // CHECK: [[ARG_ELEM:%.*]] = load [[ARG_BUF]]{{\[}}[[ARG_I]], [[ARG_J]]] // CHECK: [[IF_INIT_RES:%.*]]:4 - // CHECK-SAME: = loop.if [[SEL_INIT]] -> (index, index, f32, i1) { + // CHECK-SAME: = scf.if [[SEL_INIT]] -> (index, index, f32, i1) { // INIT-THEN-BODY, i.e. INBOUNDS == true and INIT = true @@ -133,40 +133,40 @@ func @select_and_scatter(%arg: memref<112x112xf32>, // Depending on PRED, return ARG ivs & elem or current select ivs and value. - // CHECK: [[IF_PRED_RES:%.*]]:4 = loop.if [[PRED]] - // CHECK: loop.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], [[CTRUE]] + // CHECK: [[IF_PRED_RES:%.*]]:4 = scf.if [[PRED]] + // CHECK: scf.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], [[CTRUE]] // CHECK: } else { - // CHECK: loop.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], [[SEL_INIT]] + // CHECK: scf.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], [[SEL_INIT]] // CHECK: } // INIT-THEN-BODY yield. - // CHECK: loop.yield [[IF_PRED_RES]]#0, [[IF_PRED_RES]]#1, + // CHECK: scf.yield [[IF_PRED_RES]]#0, [[IF_PRED_RES]]#1, // CHECK-SAME: [[IF_PRED_RES]]#2, [[IF_PRED_RES]]#3 // INIT-ELSE-BODY, i.e. if INBOUNDS == TRUE and INIT == FALSE, returns ARG // ivs and element without computing Select function. - // CHECK: loop.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], + // CHECK: scf.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], // CHECK-SAME: [[CTRUE]] : index, index, f32, i1 // CHECK: } // INBOUNDS-THEN-BODY yield. - // CHECK: loop.yield [[IF_INIT_RES]]#0, [[IF_INIT_RES]]#1, [[IF_INIT_RES]]#2, + // CHECK: scf.yield [[IF_INIT_RES]]#0, [[IF_INIT_RES]]#1, [[IF_INIT_RES]]#2, // CHECK-SAME: [[IF_INIT_RES]]#3 : index, index, f32, i1 // CHECK: } // INBOUNDS-ELSE-REGION, i.e. if INBOUNDS == FALSE // We are in the pad area, return current iter_args. - // CHECK: loop.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], + // CHECK: scf.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], // CHECK-SAME: [[SEL_INIT]] : index, index, f32, i1 // CHECK: } // Window loop w.r.t. second dim yield. -// CHECK: loop.yield [[IF_INBOUNDS_RES]]#0, [[IF_INBOUNDS_RES]]#1, +// CHECK: scf.yield [[IF_INBOUNDS_RES]]#0, [[IF_INBOUNDS_RES]]#1, // CHECK-SAME: [[IF_INBOUNDS_RES]]#2, [[IF_INBOUNDS_RES]]#3 // CHECK: } // Window loop w.r.t. first dim yield. -// CHECK: loop.yield [[SEL_RES_J]]#0, [[SEL_RES_J]]#1, [[SEL_RES_J]]#2, +// CHECK: scf.yield [[SEL_RES_J]]#0, [[SEL_RES_J]]#1, [[SEL_RES_J]]#2, // CHECK-SAME: [[SEL_RES_J]]#3 : index, index, f32, i1 // CHECK: } @@ -196,4 +196,4 @@ func @select_and_scatter(%arg: memref<112x112xf32>, // CHECK: atomic_yield [[RES]] : f32 // Parallel loop over source buffer yield -// CHECK: loop.yield +// CHECK: scf.yield diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-gpu.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-gpu.mlir index 4d878cee6f4..16ffbf241b0 100644 --- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-gpu.mlir +++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-gpu.mlir @@ -22,7 +22,7 @@ func @reduce(%arg: memref<100x10xf32>, // CHECK-DAG: %[[LB:.*]] = constant 0 : index // CHECK-DAG: %[[UB:.*]] = constant 10 : index // CHECK-DAG: %[[STEP:.*]] = constant 1 : index -// CHECK: loop.for %[[IDX1:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +// CHECK: scf.for %[[IDX1:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { // CHECK: %[[LHS:.*]] = linalg.slice %[[ARG2]][%[[IDX]]] : memref<100xf32>, index, memref // CHECK: %[[RHS:.*]] = linalg.slice %[[ARG0]][%[[IDX]], %[[IDX1]]] : memref<100x10xf32>, index, index, memref // CHECK: "xla_lhlo.add"(%[[LHS]], %[[RHS]], %[[LHS]]) : (memref, memref, memref) -> () diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-parallel-loops.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-parallel-loops.mlir index cb169e060ef..32c367f97d6 100644 --- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-parallel-loops.mlir +++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-parallel-loops.mlir @@ -22,13 +22,13 @@ func @reduce(%arg: memref<100x10x5xf32>, // CHECK-DAG: [[C10:%.*]] = constant 10 : index // CHECK-DAG: [[C100:%.*]] = constant 100 : index // CHECK: [[INIT:%.*]] = load [[INIT_BUF]] -// CHECK: loop.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]]) +// CHECK: scf.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]]) // CHECK-SAME: to ([[C100]], [[C5]]) step ([[C1]], [[C1]]) { -// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel ([[J:%.*]]) = +// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel ([[J:%.*]]) = // CHECK-SAME: ([[C0]]) to ([[C10]]) step ([[C1]]) init ([[INIT]]) -> f32 { // CHECK: [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]] // CHECK-SAME: {{\[}}[[I]], [[J]], [[K]]] : memref<100x10x5xf32> -// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 { +// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 { // CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32): // CHECK: [[ELEM_BUF:%.*]] = alloc() : memref // CHECK: [[ACC_BUF:%.*]] = alloc() : memref @@ -37,12 +37,12 @@ func @reduce(%arg: memref<100x10x5xf32>, // CHECK: store [[ACC]], [[ACC_BUF]][] : memref // CHECK: "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]]) // CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref -// CHECK: loop.reduce.return [[ACC_RESULT]] : f32 +// CHECK: scf.reduce.return [[ACC_RESULT]] : f32 // CHECK: } -// CHECK: loop.yield +// CHECK: scf.yield // CHECK: } // CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[K]]] -// CHECK: loop.yield +// CHECK: scf.yield // ----- @@ -66,10 +66,10 @@ func @reduce_no_outer_loop(%arg: memref<100xf32>, // CHECK-DAG: [[C1:%.*]] = constant 1 : index // CHECK-DAG: [[C100:%.*]] = constant 100 : index // CHECK: [[INIT:%.*]] = load [[INIT_BUF]] -// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel ([[I:%.*]]) = ([[C0]]) +// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel ([[I:%.*]]) = ([[C0]]) // CHECK-SAME: to ([[C100]]) step ([[C1]]) init ([[INIT]]) -> f32 { // CHECK: [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]{{\[}}[[I]]{{\]}} -// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 { +// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 { // CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32): // CHECK: [[ELEM_BUF:%.*]] = alloc() : memref // CHECK: [[ACC_BUF:%.*]] = alloc() : memref @@ -78,9 +78,9 @@ func @reduce_no_outer_loop(%arg: memref<100xf32>, // CHECK: store [[ACC]], [[ACC_BUF]][] : memref // CHECK: "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]]) // CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref -// CHECK: loop.reduce.return [[ACC_RESULT]] +// CHECK: scf.reduce.return [[ACC_RESULT]] // CHECK: } -// CHECK: loop.yield +// CHECK: scf.yield // CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[C0]]] // ----- @@ -107,13 +107,13 @@ func @dynamic_reduce(%arg: memref, // CHECK: [[DIM1:%.*]] = dim [[ARG_BUF]], 1 : memref // CHECK: [[DIM2:%.*]] = dim [[ARG_BUF]], 2 : memref // CHECK: [[INIT:%.*]] = load [[INIT_BUF]] -// CHECK: loop.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]]) +// CHECK: scf.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]]) // CHECK-SAME: to ([[DIM0]], [[DIM2]]) step ([[C1]], [[C1]]) { -// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel ([[J:%.*]]) = +// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel ([[J:%.*]]) = // CHECK-SAME: ([[C0]]) to ([[DIM1]]) step ([[C1]]) init ([[INIT]]) -> f32 { // CHECK: [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]] // CHECK-SAME: {{\[}}[[I]], [[J]], [[K]]] : memref -// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 { +// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 { // CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32): // CHECK: [[ELEM_BUF:%.*]] = alloc() : memref // CHECK: [[ACC_BUF:%.*]] = alloc() : memref @@ -122,12 +122,12 @@ func @dynamic_reduce(%arg: memref, // CHECK: store [[ACC]], [[ACC_BUF]][] : memref // CHECK: "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]]) // CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref -// CHECK: loop.reduce.return [[ACC_RESULT]] : f32 +// CHECK: scf.reduce.return [[ACC_RESULT]] : f32 // CHECK: } -// CHECK: loop.yield +// CHECK: scf.yield // CHECK: } // CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[K]]] -// CHECK: loop.yield +// CHECK: scf.yield // ----- @@ -158,9 +158,9 @@ func @reduce_window(%arg: memref<112x112xf32>, // CHECK-DAG: [[C56:%.*]] = constant 56 : index // CHECK-DAG: [[C112:%.*]] = constant 112 : index // CHECK: [[INIT:%.*]] = load [[INIT_BUF]][] : memref -// CHECK: loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) +// CHECK: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) // CHECK-SAME: to ([[C56]], [[C56]]) step ([[C1]], [[C1]]) { -// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel +// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel // CHECK-SAME: ([[IW:%.*]], [[JW:%.*]]) = ([[C0]], [[C0]]) // CHECK-SAME: to ([[C3]], [[C3]]) step ([[C1]], [[C1]]) // CHECK-SAME: init ([[INIT]]) -> f32 { @@ -177,15 +177,15 @@ func @reduce_window(%arg: memref<112x112xf32>, // CHECK: [[INDEX_J_FITS:%.*]] = cmpi "ult", [[INDEX_J]], [[C112]] // CHECK: [[IN_BOUNDS_1:%.*]] = and [[IN_BOUNDS_0]], [[INDEX_J_FITS]] -// CHECK: [[ELEM_TO_REDUCE:%.*]] = loop.if [[IN_BOUNDS_1]] -> (f32) { +// CHECK: [[ELEM_TO_REDUCE:%.*]] = scf.if [[IN_BOUNDS_1]] -> (f32) { // CHECK: [[OPERAND_ELEM:%.*]] = // CHECK-SAME: load [[OPERAND_BUF]]{{\[}}[[INDEX_I]], [[INDEX_J]]] -// CHECK: loop.yield [[OPERAND_ELEM]] : f32 +// CHECK: scf.yield [[OPERAND_ELEM]] : f32 // CHECK: } else { -// CHECK: loop.yield [[INIT]] : f32 +// CHECK: scf.yield [[INIT]] : f32 // CHECK: } -// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 { +// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 { // CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32): // CHECK: [[ELEM_BUF:%.*]] = alloc() : memref // CHECK: [[ACC_BUF:%.*]] = alloc() : memref @@ -194,12 +194,12 @@ func @reduce_window(%arg: memref<112x112xf32>, // CHECK: store [[ACC]], [[ACC_BUF]][] : memref // CHECK: "xla_lhlo.maximum"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]]) // CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref -// CHECK: loop.reduce.return [[ACC_RESULT]] : f32 +// CHECK: scf.reduce.return [[ACC_RESULT]] : f32 // CHECK: } -// CHECK: loop.yield +// CHECK: scf.yield // CHECK: } // CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[J]]] -// CHECK: loop.yield +// CHECK: scf.yield // CHECK: } // CHECK: return // CHECK: } diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc index c5f5b39e04c..734a75a4307 100644 --- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc +++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_parallel_loops.cc @@ -61,8 +61,8 @@ Value ApplySingleResultLhloCode(Location loc, ValueRange operands, // Converts a block with LHLO ops and with signature: // ^bb(%lhs: memref, %rhs: memref, %res: memref): -// into a reduction operator of loop.reduce by doing buffer allocation for -// scalar arguments and the result of `loop.reduce` to make it compatible with +// into a reduction operator of scf.reduce by doing buffer allocation for +// scalar arguments and the result of `scf.reduce` to make it compatible with // LHLO ops. void ConvertToReductionOperator(Location loc, scf::ReduceOp reduce_op, Block* lhlo_block, OpBuilder* b) { @@ -170,10 +170,10 @@ scf::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value, // is roughly converted into: // // %init = load %init_buf[] : memref -// loop.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) { -// %result = loop.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) { +// scf.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) { +// %result = scf.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) { // %elem_to_reduce = load %buffer[%i, %j, %k] : memref<100x10x5xf32> -// loop.reduce(%elem_to_reduce) { +// scf.reduce(%elem_to_reduce) { // ^bb0(%elem: f32, %acc: f32): // no predecessors // elem_buf = alloc() : memref // store %elem, elem_buf[] : memref @@ -181,11 +181,11 @@ scf::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value, // store %acc, acc_buf[] : memref // // %acc_result = load acc_buf[] : memref -// loop.reduce.return %acc_result : f32 +// scf.reduce.return %acc_result : f32 // } : f32 -// loop.yield +// scf.yield // } : f32 -// loop.yield +// scf.yield // } class ReduceOpConverter : public OpConversionPattern { public: @@ -206,24 +206,24 @@ class ReduceOpConverter : public OpConversionPattern { } private: - // Creates nested `loop.parallel` ops with `loop.reduce`. The outer ParallelOp + // Creates nested `scf.parallel` ops with `scf.reduce`. The outer ParallelOp // refers to the parallel dimensions of `xla_reduce_op` if any and the inner - // ParallelOp refers to the reduction dimensions. The loop.reduce op is + // ParallelOp refers to the reduction dimensions. The scf.reduce op is // returned. // // If the reduction argument is a memref<100x10x5xf32> and the // reduction is performed along dimension 1 then this method will generate // // %init = load %init_buf[] : memref - // loop.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) { - // %result = loop.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) { + // scf.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) { + // %result = scf.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) { // %elem_to_reduce = load %buffer[%i, %j, %k] : memref<100x10x5xf32> - // loop.reduce(%elem_to_reduce) { + // scf.reduce(%elem_to_reduce) { // // } : f32 - // loop.yield + // scf.yield // } : f32 - // loop.yield + // scf.yield // } scf::ReduceOp CreateReduceOpInNestedParallelLoops( xla_lhlo::ReduceOp xla_reduce_op, @@ -341,20 +341,20 @@ class ReduceOpConverter : public OpConversionPattern { // is roughly converted into: // // %neutral_elem = load %init_buf[] : memref -// loop.parallel (%i, %j) = (%c0, %c0) to (%c56, %c56) step (%c1, %c1) { -// %result = loop.parallel (%iw, %jw) = (%c0, %c0) +// scf.parallel (%i, %j) = (%c0, %c0) to (%c56, %c56) step (%c1, %c1) { +// %result = scf.parallel (%iw, %jw) = (%c0, %c0) // to (%c3, %c3) step (%c1, %c1) neutral_elem (%0) -> f32 { // %in_bounds = // %elem = load %operand[%computed_i, %computed_j] // %elem_or_neutral = select %in_bounds, %elem, %neutral_elem : f32 -// loop.reduce(%elem_to_reduce) : f32 { +// scf.reduce(%elem_to_reduce) : f32 { // ^bb0(%arg7: f32, %arg8: f32): // // } -// loop.yield +// scf.yield // } // store %result, %output_buffer[%i, %j] : memref<56x56xf32> -// loop.yield +// scf.yield // } // return // } @@ -457,16 +457,16 @@ class ReduceWindowOpConverter // https://www.tensorflow.org/xla/operation_semantics#selectandscatter // // Pseudocode: -// loop.parallel(coordinates O in the output): +// scf.parallel(coordinates O in the output): // output[O] = init -// loop.parallel(coordinates S in the source): +// scf.parallel(coordinates S in the source): // selected_ivs = 0 // selected_val = 0 // initialized_flag = false -// loop.for (first dim W_1 in the window) +// scf.for (first dim W_1 in the window) // iter_args (selected_ivs, selected_val, initialized_flag): // ... -// loop.for (last dim W_N in the window): +// scf.for (last dim W_N in the window): // iter_args (selected_ivs, selected_val, initialized_flag): // I = S * stride + W - pad_low // if I within bounds of operand: diff --git a/tensorflow/compiler/mlir/xla/transforms/passes.h b/tensorflow/compiler/mlir/xla/transforms/passes.h index 2d0164981a3..39375e210d5 100644 --- a/tensorflow/compiler/mlir/xla/transforms/passes.h +++ b/tensorflow/compiler/mlir/xla/transforms/passes.h @@ -81,8 +81,8 @@ std::unique_ptr> createLegalizeToGpuPass(); // Fuses linalg ops obtained after LHLO lowering. To enable fusion, // operations are first tiled. // -// When 'use_parallel_loops' is set, the tiling will use loop.parallel -// operations. Otherwise, loop.for operations are used. +// When 'use_parallel_loops' is set, the tiling will use scf.parallel +// operations. Otherwise, scf.for operations are used. // // 'tile_sizes' provides the tile sizes to use for tiling. If the linalg // operation has more dimensions than tile sizes provided, 1 is used as