PiperOrigin-RevId: 311490759
Change-Id: Icd37195b07135947a26f185a8d2a1ddc1adf718c
This commit is contained in:
A. Unique TensorFlower 2020-05-14 01:46:54 -07:00 committed by TensorFlower Gardener
parent ca18db7f3f
commit b187ba0bcc
6 changed files with 95 additions and 95 deletions

View File

@ -24,9 +24,9 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>,
// CHECK-LABEL: func @fusion
// CHECK: %[[C1:.*]] = constant 1
// CHECK-NOT: linalg.generic
// CHECK: loop.for {{.*}} step %[[C1]]
// CHECK: loop.for {{.*}} step %[[C1]]
// CHECK-NOT: loop.for
// CHECK: scf.for {{.*}} step %[[C1]]
// CHECK: scf.for {{.*}} step %[[C1]]
// CHECK-NOT: scf.for
// CHECK: linalg.generic
// CHECK: addf
// CHECK: linalg.generic
@ -36,9 +36,9 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>,
// TILED-DAG: %[[C2:.*]] = constant 2
// TILED-DAG: %[[C3:.*]] = constant 3
// TILED-NOT: linalg.generic
// TILED: loop.for {{.*}} step %[[C2]]
// TILED: loop.for {{.*}} step %[[C3]]
// TILED-NOT: loop.for
// TILED: scf.for {{.*}} step %[[C2]]
// TILED: scf.for {{.*}} step %[[C3]]
// TILED-NOT: scf.for
// TILED: linalg.generic
// TILED: addf
// TILED: linalg.generic
@ -46,8 +46,8 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>,
// PLOOP-LABEL: func @fusion
// PLOOP-NOT: linalg.generic
// PLOOP: loop.parallel
// PLOOP-NOT: loop.parallel
// PLOOP: scf.parallel
// PLOOP-NOT: scf.parallel
// PLOOP: linalg.generic
// PLOOP: addf
// PLOOP: linalg.generic
@ -94,9 +94,9 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
// CHECK-LABEL: func @fusion
// CHECK: %[[C1:.*]] = constant 1
// CHECK-NOT: linalg.generic
// CHECK: loop.for {{.*}} step %[[C1]]
// CHECK: loop.for {{.*}} step %[[C1]]
// CHECK-NOT: loop.for
// CHECK: scf.for {{.*}} step %[[C1]]
// CHECK: scf.for {{.*}} step %[[C1]]
// CHECK-NOT: scf.for
// CHECK: linalg.generic
// CHECK: linalg.generic
// CHECK: subf
@ -107,9 +107,9 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
// TILED-DAG: %[[C2:.*]] = constant 2
// TILED-DAG: %[[C3:.*]] = constant 3
// TILED-NOT: linalg.generic
// TILED: loop.for {{.*}} step %[[C2]]
// TILED: loop.for {{.*}} step %[[C3]]
// TILED-NOT: loop.for
// TILED: scf.for {{.*}} step %[[C2]]
// TILED: scf.for {{.*}} step %[[C3]]
// TILED-NOT: scf.for
// TILED: linalg.generic
// TILED: linalg.generic
// TILED: subf
@ -118,8 +118,8 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
// PLOOP-LABEL: func @fusion_of_three
// PLOOP-NOT: linalg.generic
// PLOOP: loop.parallel
// PLOOP-NOT: loop.parallel
// PLOOP: scf.parallel
// PLOOP-NOT: scf.parallel
// PLOOP: linalg.generic
// PLOOP: linalg.generic
// PLOOP: subf
@ -147,11 +147,11 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32
// CHECK-LABEL: func @fusion_4d
// CHECK: %[[C1:.*]] = constant 1
// CHECK-NOT: linalg.generic
// CHECK: loop.for {{.*}} step %[[C1]]
// CHECK: loop.for {{.*}} step %[[C1]]
// CHECK: loop.for {{.*}} step %[[C1]]
// CHECK: loop.for {{.*}} step %[[C1]]
// CHECK-NOT: loop.for
// CHECK: scf.for {{.*}} step %[[C1]]
// CHECK: scf.for {{.*}} step %[[C1]]
// CHECK: scf.for {{.*}} step %[[C1]]
// CHECK: scf.for {{.*}} step %[[C1]]
// CHECK-NOT: scf.for
// CHECK: linalg.generic
// CHECK: addf
// CHECK: linalg.generic
@ -161,9 +161,9 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32
// TILED-DAG: %[[C2:.*]] = constant 2
// TILED-DAG: %[[C3:.*]] = constant 3
// TILED-NOT: linalg.generic
// TILED: loop.for {{.*}} step %[[C2]]
// TILED: loop.for {{.*}} step %[[C3]]
// TILED-NOT: loop.for
// TILED: scf.for {{.*}} step %[[C2]]
// TILED: scf.for {{.*}} step %[[C3]]
// TILED-NOT: scf.for
// TILED: linalg.generic
// TILED: addf
// TILED: linalg.generic
@ -171,8 +171,8 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32
// PLOOP-LABEL: func @fusion_4d
// PLOOP-NOT: linalg.generic
// PLOOP: loop.parallel
// PLOOP-NOT: loop.parallel
// PLOOP: scf.parallel
// PLOOP-NOT: scf.parallel
// PLOOP: linalg.generic
// PLOOP: addf
// PLOOP: linalg.generic

View File

@ -50,19 +50,19 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
// Parallel loop to initialize the output buffer.
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]][] : memref<f32>
// CHECK: loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
// CHECK: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
// CHECK-SAME: to ([[C112]], [[C112]]) step ([[C1]], [[C1]]) {
// CHECK: store [[INIT]], [[RESULT_BUF]]{{\[}}[[I]], [[J]]]
// CHECK: loop.yield
// CHECK: scf.yield
// CHECK: }
// Parallel loop over source buffer to compute scattered values.
// CHECK: loop.parallel ([[II:%.*]], [[JJ:%.*]]) = ([[C0]], [[C0]])
// CHECK: scf.parallel ([[II:%.*]], [[JJ:%.*]]) = ([[C0]], [[C0]])
// CHECK-SAME: to ([[C56]], [[C56]]) step ([[C1]], [[C1]]) {
// Window loop w.r.t. first dim.
// CHECK: [[SEL_RES_I:%.*]]:4
// CHECK-SAME: = loop.for [[WIN_I:%.*]] = [[C0]] to [[C3]] step [[C1]]
// CHECK-SAME: = scf.for [[WIN_I:%.*]] = [[C0]] to [[C3]] step [[C1]]
// CHECK-SAME: iter_args(
// CHECK-SAME: [[SEL_I_0:%.*]] = [[C0]], [[SEL_J_0:%.*]] = [[C0]],
// CHECK-SAME: [[SEL_VAL_0:%.*]] = [[C0_F32]],
@ -71,7 +71,7 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
// Window loop w.r.t. second dim.
// CHECK: [[SEL_RES_J:%.*]]:4
// CHECK-SAME: = loop.for [[WIN_J:%.*]] = [[C0]] to [[C3]] step [[C1]]
// CHECK-SAME: = scf.for [[WIN_J:%.*]] = [[C0]] to [[C3]] step [[C1]]
// CHECK-SAME: iter_args(
// CHECK-SAME: [[SEL_I:%.*]] = [[SEL_I_0]], [[SEL_J:%.*]] = [[SEL_J_0]],
// CHECK-SAME: [[SEL_VAL:%.*]] = [[SEL_VAL_0]],
@ -102,14 +102,14 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
// be applied, current selected ivs (SEL_I, SEL_J) and value (SEL_VAL) are
// returned in that case.
// CHECK: [[IF_INBOUNDS_RES:%.*]]:4
// CHECK-SAME: = loop.if [[INBOUNDS_1]] -> (index, index, f32, i1) {
// CHECK-SAME: = scf.if [[INBOUNDS_1]] -> (index, index, f32, i1) {
// INBOUNDS-THEN-BODY, i.e. if INBOUNDS == true
// CHECK: [[ARG_ELEM:%.*]] = load [[ARG_BUF]]{{\[}}[[ARG_I]], [[ARG_J]]]
// CHECK: [[IF_INIT_RES:%.*]]:4
// CHECK-SAME: = loop.if [[SEL_INIT]] -> (index, index, f32, i1) {
// CHECK-SAME: = scf.if [[SEL_INIT]] -> (index, index, f32, i1) {
// INIT-THEN-BODY, i.e. INBOUNDS == true and INIT = true
@ -133,40 +133,40 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
// Depending on PRED, return ARG ivs & elem or current select ivs and value.
// CHECK: [[IF_PRED_RES:%.*]]:4 = loop.if [[PRED]]
// CHECK: loop.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], [[CTRUE]]
// CHECK: [[IF_PRED_RES:%.*]]:4 = scf.if [[PRED]]
// CHECK: scf.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], [[CTRUE]]
// CHECK: } else {
// CHECK: loop.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], [[SEL_INIT]]
// CHECK: scf.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], [[SEL_INIT]]
// CHECK: }
// INIT-THEN-BODY yield.
// CHECK: loop.yield [[IF_PRED_RES]]#0, [[IF_PRED_RES]]#1,
// CHECK: scf.yield [[IF_PRED_RES]]#0, [[IF_PRED_RES]]#1,
// CHECK-SAME: [[IF_PRED_RES]]#2, [[IF_PRED_RES]]#3
// INIT-ELSE-BODY, i.e. if INBOUNDS == TRUE and INIT == FALSE, returns ARG
// ivs and element without computing Select function.
// CHECK: loop.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]],
// CHECK: scf.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]],
// CHECK-SAME: [[CTRUE]] : index, index, f32, i1
// CHECK: }
// INBOUNDS-THEN-BODY yield.
// CHECK: loop.yield [[IF_INIT_RES]]#0, [[IF_INIT_RES]]#1, [[IF_INIT_RES]]#2,
// CHECK: scf.yield [[IF_INIT_RES]]#0, [[IF_INIT_RES]]#1, [[IF_INIT_RES]]#2,
// CHECK-SAME: [[IF_INIT_RES]]#3 : index, index, f32, i1
// CHECK: }
// INBOUNDS-ELSE-REGION, i.e. if INBOUNDS == FALSE
// We are in the pad area, return current iter_args.
// CHECK: loop.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]],
// CHECK: scf.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]],
// CHECK-SAME: [[SEL_INIT]] : index, index, f32, i1
// CHECK: }
// Window loop w.r.t. second dim yield.
// CHECK: loop.yield [[IF_INBOUNDS_RES]]#0, [[IF_INBOUNDS_RES]]#1,
// CHECK: scf.yield [[IF_INBOUNDS_RES]]#0, [[IF_INBOUNDS_RES]]#1,
// CHECK-SAME: [[IF_INBOUNDS_RES]]#2, [[IF_INBOUNDS_RES]]#3
// CHECK: }
// Window loop w.r.t. first dim yield.
// CHECK: loop.yield [[SEL_RES_J]]#0, [[SEL_RES_J]]#1, [[SEL_RES_J]]#2,
// CHECK: scf.yield [[SEL_RES_J]]#0, [[SEL_RES_J]]#1, [[SEL_RES_J]]#2,
// CHECK-SAME: [[SEL_RES_J]]#3 : index, index, f32, i1
// CHECK: }
@ -196,4 +196,4 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
// CHECK: atomic_yield [[RES]] : f32
// Parallel loop over source buffer yield
// CHECK: loop.yield
// CHECK: scf.yield

View File

@ -22,7 +22,7 @@ func @reduce(%arg: memref<100x10xf32>,
// CHECK-DAG: %[[LB:.*]] = constant 0 : index
// CHECK-DAG: %[[UB:.*]] = constant 10 : index
// CHECK-DAG: %[[STEP:.*]] = constant 1 : index
// CHECK: loop.for %[[IDX1:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
// CHECK: scf.for %[[IDX1:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
// CHECK: %[[LHS:.*]] = linalg.slice %[[ARG2]][%[[IDX]]] : memref<100xf32>, index, memref<f32, #map0>
// CHECK: %[[RHS:.*]] = linalg.slice %[[ARG0]][%[[IDX]], %[[IDX1]]] : memref<100x10xf32>, index, index, memref<f32, #map0>
// CHECK: "xla_lhlo.add"(%[[LHS]], %[[RHS]], %[[LHS]]) : (memref<f32, {{.*}}>, memref<f32, {{.*}}>, memref<f32, {{.*}}>) -> ()

View File

@ -22,13 +22,13 @@ func @reduce(%arg: memref<100x10x5xf32>,
// CHECK-DAG: [[C10:%.*]] = constant 10 : index
// CHECK-DAG: [[C100:%.*]] = constant 100 : index
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]]
// CHECK: loop.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
// CHECK: scf.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
// CHECK-SAME: to ([[C100]], [[C5]]) step ([[C1]], [[C1]]) {
// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel ([[J:%.*]]) =
// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel ([[J:%.*]]) =
// CHECK-SAME: ([[C0]]) to ([[C10]]) step ([[C1]]) init ([[INIT]]) -> f32 {
// CHECK: [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]
// CHECK-SAME: {{\[}}[[I]], [[J]], [[K]]] : memref<100x10x5xf32>
// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
// CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
// CHECK: [[ELEM_BUF:%.*]] = alloc() : memref<f32>
// CHECK: [[ACC_BUF:%.*]] = alloc() : memref<f32>
@ -37,12 +37,12 @@ func @reduce(%arg: memref<100x10x5xf32>,
// CHECK: store [[ACC]], [[ACC_BUF]][] : memref<f32>
// CHECK: "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
// CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
// CHECK: loop.reduce.return [[ACC_RESULT]] : f32
// CHECK: scf.reduce.return [[ACC_RESULT]] : f32
// CHECK: }
// CHECK: loop.yield
// CHECK: scf.yield
// CHECK: }
// CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[K]]]
// CHECK: loop.yield
// CHECK: scf.yield
// -----
@ -66,10 +66,10 @@ func @reduce_no_outer_loop(%arg: memref<100xf32>,
// CHECK-DAG: [[C1:%.*]] = constant 1 : index
// CHECK-DAG: [[C100:%.*]] = constant 100 : index
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]]
// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel ([[I:%.*]]) = ([[C0]])
// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel ([[I:%.*]]) = ([[C0]])
// CHECK-SAME: to ([[C100]]) step ([[C1]]) init ([[INIT]]) -> f32 {
// CHECK: [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]{{\[}}[[I]]{{\]}}
// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
// CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
// CHECK: [[ELEM_BUF:%.*]] = alloc() : memref<f32>
// CHECK: [[ACC_BUF:%.*]] = alloc() : memref<f32>
@ -78,9 +78,9 @@ func @reduce_no_outer_loop(%arg: memref<100xf32>,
// CHECK: store [[ACC]], [[ACC_BUF]][] : memref<f32>
// CHECK: "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
// CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
// CHECK: loop.reduce.return [[ACC_RESULT]]
// CHECK: scf.reduce.return [[ACC_RESULT]]
// CHECK: }
// CHECK: loop.yield
// CHECK: scf.yield
// CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[C0]]]
// -----
@ -107,13 +107,13 @@ func @dynamic_reduce(%arg: memref<?x?x?xf32>,
// CHECK: [[DIM1:%.*]] = dim [[ARG_BUF]], 1 : memref<?x?x?xf32>
// CHECK: [[DIM2:%.*]] = dim [[ARG_BUF]], 2 : memref<?x?x?xf32>
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]]
// CHECK: loop.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
// CHECK: scf.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
// CHECK-SAME: to ([[DIM0]], [[DIM2]]) step ([[C1]], [[C1]]) {
// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel ([[J:%.*]]) =
// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel ([[J:%.*]]) =
// CHECK-SAME: ([[C0]]) to ([[DIM1]]) step ([[C1]]) init ([[INIT]]) -> f32 {
// CHECK: [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]
// CHECK-SAME: {{\[}}[[I]], [[J]], [[K]]] : memref<?x?x?xf32>
// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
// CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
// CHECK: [[ELEM_BUF:%.*]] = alloc() : memref<f32>
// CHECK: [[ACC_BUF:%.*]] = alloc() : memref<f32>
@ -122,12 +122,12 @@ func @dynamic_reduce(%arg: memref<?x?x?xf32>,
// CHECK: store [[ACC]], [[ACC_BUF]][] : memref<f32>
// CHECK: "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
// CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
// CHECK: loop.reduce.return [[ACC_RESULT]] : f32
// CHECK: scf.reduce.return [[ACC_RESULT]] : f32
// CHECK: }
// CHECK: loop.yield
// CHECK: scf.yield
// CHECK: }
// CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[K]]]
// CHECK: loop.yield
// CHECK: scf.yield
// -----
@ -158,9 +158,9 @@ func @reduce_window(%arg: memref<112x112xf32>,
// CHECK-DAG: [[C56:%.*]] = constant 56 : index
// CHECK-DAG: [[C112:%.*]] = constant 112 : index
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]][] : memref<f32>
// CHECK: loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
// CHECK: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
// CHECK-SAME: to ([[C56]], [[C56]]) step ([[C1]], [[C1]]) {
// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel
// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel
// CHECK-SAME: ([[IW:%.*]], [[JW:%.*]]) = ([[C0]], [[C0]])
// CHECK-SAME: to ([[C3]], [[C3]]) step ([[C1]], [[C1]])
// CHECK-SAME: init ([[INIT]]) -> f32 {
@ -177,15 +177,15 @@ func @reduce_window(%arg: memref<112x112xf32>,
// CHECK: [[INDEX_J_FITS:%.*]] = cmpi "ult", [[INDEX_J]], [[C112]]
// CHECK: [[IN_BOUNDS_1:%.*]] = and [[IN_BOUNDS_0]], [[INDEX_J_FITS]]
// CHECK: [[ELEM_TO_REDUCE:%.*]] = loop.if [[IN_BOUNDS_1]] -> (f32) {
// CHECK: [[ELEM_TO_REDUCE:%.*]] = scf.if [[IN_BOUNDS_1]] -> (f32) {
// CHECK: [[OPERAND_ELEM:%.*]] =
// CHECK-SAME: load [[OPERAND_BUF]]{{\[}}[[INDEX_I]], [[INDEX_J]]]
// CHECK: loop.yield [[OPERAND_ELEM]] : f32
// CHECK: scf.yield [[OPERAND_ELEM]] : f32
// CHECK: } else {
// CHECK: loop.yield [[INIT]] : f32
// CHECK: scf.yield [[INIT]] : f32
// CHECK: }
// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
// CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
// CHECK: [[ELEM_BUF:%.*]] = alloc() : memref<f32>
// CHECK: [[ACC_BUF:%.*]] = alloc() : memref<f32>
@ -194,12 +194,12 @@ func @reduce_window(%arg: memref<112x112xf32>,
// CHECK: store [[ACC]], [[ACC_BUF]][] : memref<f32>
// CHECK: "xla_lhlo.maximum"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
// CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
// CHECK: loop.reduce.return [[ACC_RESULT]] : f32
// CHECK: scf.reduce.return [[ACC_RESULT]] : f32
// CHECK: }
// CHECK: loop.yield
// CHECK: scf.yield
// CHECK: }
// CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[J]]]
// CHECK: loop.yield
// CHECK: scf.yield
// CHECK: }
// CHECK: return
// CHECK: }

View File

@ -61,8 +61,8 @@ Value ApplySingleResultLhloCode(Location loc, ValueRange operands,
// Converts a block with LHLO ops and with signature:
// ^bb(%lhs: memref<f32>, %rhs: memref<f32>, %res: memref<f32>):
// into a reduction operator of loop.reduce by doing buffer allocation for
// scalar arguments and the result of `loop.reduce` to make it compatible with
// into a reduction operator of scf.reduce by doing buffer allocation for
// scalar arguments and the result of `scf.reduce` to make it compatible with
// LHLO ops.
void ConvertToReductionOperator(Location loc, scf::ReduceOp reduce_op,
Block* lhlo_block, OpBuilder* b) {
@ -170,10 +170,10 @@ scf::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value,
// is roughly converted into:
//
// %init = load %init_buf[] : memref<f32>
// loop.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
// %result = loop.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
// scf.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
// %result = scf.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
// %elem_to_reduce = load %buffer[%i, %j, %k] : memref<100x10x5xf32>
// loop.reduce(%elem_to_reduce) {
// scf.reduce(%elem_to_reduce) {
// ^bb0(%elem: f32, %acc: f32): // no predecessors
// elem_buf = alloc() : memref<f32>
// store %elem, elem_buf[] : memref<f32>
@ -181,11 +181,11 @@ scf::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value,
// store %acc, acc_buf[] : memref<f32>
// <LHLO_ops>
// %acc_result = load acc_buf[] : memref<f32>
// loop.reduce.return %acc_result : f32
// scf.reduce.return %acc_result : f32
// } : f32
// loop.yield
// scf.yield
// } : f32
// loop.yield
// scf.yield
// }
class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
public:
@ -206,24 +206,24 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
}
private:
// Creates nested `loop.parallel` ops with `loop.reduce`. The outer ParallelOp
// Creates nested `scf.parallel` ops with `scf.reduce`. The outer ParallelOp
// refers to the parallel dimensions of `xla_reduce_op` if any and the inner
// ParallelOp refers to the reduction dimensions. The loop.reduce op is
// ParallelOp refers to the reduction dimensions. The scf.reduce op is
// returned.
//
// If the reduction argument is a memref<100x10x5xf32> and the
// reduction is performed along dimension 1 then this method will generate
//
// %init = load %init_buf[] : memref<f32>
// loop.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
// %result = loop.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
// scf.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
// %result = scf.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
// %elem_to_reduce = load %buffer[%i, %j, %k] : memref<100x10x5xf32>
// loop.reduce(%elem_to_reduce) {
// scf.reduce(%elem_to_reduce) {
// <THE BLOCK PTR TO BE RETURNED>
// } : f32
// loop.yield
// scf.yield
// } : f32
// loop.yield
// scf.yield
// }
scf::ReduceOp CreateReduceOpInNestedParallelLoops(
xla_lhlo::ReduceOp xla_reduce_op,
@ -341,20 +341,20 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
// is roughly converted into:
//
// %neutral_elem = load %init_buf[] : memref<f32>
// loop.parallel (%i, %j) = (%c0, %c0) to (%c56, %c56) step (%c1, %c1) {
// %result = loop.parallel (%iw, %jw) = (%c0, %c0)
// scf.parallel (%i, %j) = (%c0, %c0) to (%c56, %c56) step (%c1, %c1) {
// %result = scf.parallel (%iw, %jw) = (%c0, %c0)
// to (%c3, %c3) step (%c1, %c1) neutral_elem (%0) -> f32 {
// %in_bounds = <COMPUTE IF INDEX IS IN OPERAND'S pad>
// %elem = load %operand[%computed_i, %computed_j]
// %elem_or_neutral = select %in_bounds, %elem, %neutral_elem : f32
// loop.reduce(%elem_to_reduce) : f32 {
// scf.reduce(%elem_to_reduce) : f32 {
// ^bb0(%arg7: f32, %arg8: f32):
// <LHLO ops>
// }
// loop.yield
// scf.yield
// }
// store %result, %output_buffer[%i, %j] : memref<56x56xf32>
// loop.yield
// scf.yield
// }
// return
// }
@ -457,16 +457,16 @@ class ReduceWindowOpConverter
// https://www.tensorflow.org/xla/operation_semantics#selectandscatter
//
// Pseudocode:
// loop.parallel(coordinates O in the output):
// scf.parallel(coordinates O in the output):
// output[O] = init
// loop.parallel(coordinates S in the source):
// scf.parallel(coordinates S in the source):
// selected_ivs = 0
// selected_val = 0
// initialized_flag = false
// loop.for (first dim W_1 in the window)
// scf.for (first dim W_1 in the window)
// iter_args (selected_ivs, selected_val, initialized_flag):
// ...
// loop.for (last dim W_N in the window):
// scf.for (last dim W_N in the window):
// iter_args (selected_ivs, selected_val, initialized_flag):
// I = S * stride + W - pad_low
// if I within bounds of operand:

View File

@ -81,8 +81,8 @@ std::unique_ptr<OperationPass<FuncOp>> createLegalizeToGpuPass();
// Fuses linalg ops obtained after LHLO lowering. To enable fusion,
// operations are first tiled.
//
// When 'use_parallel_loops' is set, the tiling will use loop.parallel
// operations. Otherwise, loop.for operations are used.
// When 'use_parallel_loops' is set, the tiling will use scf.parallel
// operations. Otherwise, scf.for operations are used.
//
// 'tile_sizes' provides the tile sizes to use for tiling. If the linalg
// operation has more dimensions than tile sizes provided, 1 is used as