Integrate LLVM at https://github.com/llvm/llvm-project/commit/bfa200ebcf37
PiperOrigin-RevId: 311490759 Change-Id: Icd37195b07135947a26f185a8d2a1ddc1adf718c
This commit is contained in:
parent
ca18db7f3f
commit
b187ba0bcc
|
@ -24,9 +24,9 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>,
|
|||
// CHECK-LABEL: func @fusion
|
||||
// CHECK: %[[C1:.*]] = constant 1
|
||||
// CHECK-NOT: linalg.generic
|
||||
// CHECK: loop.for {{.*}} step %[[C1]]
|
||||
// CHECK: loop.for {{.*}} step %[[C1]]
|
||||
// CHECK-NOT: loop.for
|
||||
// CHECK: scf.for {{.*}} step %[[C1]]
|
||||
// CHECK: scf.for {{.*}} step %[[C1]]
|
||||
// CHECK-NOT: scf.for
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: addf
|
||||
// CHECK: linalg.generic
|
||||
|
@ -36,9 +36,9 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>,
|
|||
// TILED-DAG: %[[C2:.*]] = constant 2
|
||||
// TILED-DAG: %[[C3:.*]] = constant 3
|
||||
// TILED-NOT: linalg.generic
|
||||
// TILED: loop.for {{.*}} step %[[C2]]
|
||||
// TILED: loop.for {{.*}} step %[[C3]]
|
||||
// TILED-NOT: loop.for
|
||||
// TILED: scf.for {{.*}} step %[[C2]]
|
||||
// TILED: scf.for {{.*}} step %[[C3]]
|
||||
// TILED-NOT: scf.for
|
||||
// TILED: linalg.generic
|
||||
// TILED: addf
|
||||
// TILED: linalg.generic
|
||||
|
@ -46,8 +46,8 @@ func @fusion(%multiplier: memref<6x6xf32>, %summand_1: memref<6x6xf32>,
|
|||
|
||||
// PLOOP-LABEL: func @fusion
|
||||
// PLOOP-NOT: linalg.generic
|
||||
// PLOOP: loop.parallel
|
||||
// PLOOP-NOT: loop.parallel
|
||||
// PLOOP: scf.parallel
|
||||
// PLOOP-NOT: scf.parallel
|
||||
// PLOOP: linalg.generic
|
||||
// PLOOP: addf
|
||||
// PLOOP: linalg.generic
|
||||
|
@ -94,9 +94,9 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
|
|||
// CHECK-LABEL: func @fusion
|
||||
// CHECK: %[[C1:.*]] = constant 1
|
||||
// CHECK-NOT: linalg.generic
|
||||
// CHECK: loop.for {{.*}} step %[[C1]]
|
||||
// CHECK: loop.for {{.*}} step %[[C1]]
|
||||
// CHECK-NOT: loop.for
|
||||
// CHECK: scf.for {{.*}} step %[[C1]]
|
||||
// CHECK: scf.for {{.*}} step %[[C1]]
|
||||
// CHECK-NOT: scf.for
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: subf
|
||||
|
@ -107,9 +107,9 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
|
|||
// TILED-DAG: %[[C2:.*]] = constant 2
|
||||
// TILED-DAG: %[[C3:.*]] = constant 3
|
||||
// TILED-NOT: linalg.generic
|
||||
// TILED: loop.for {{.*}} step %[[C2]]
|
||||
// TILED: loop.for {{.*}} step %[[C3]]
|
||||
// TILED-NOT: loop.for
|
||||
// TILED: scf.for {{.*}} step %[[C2]]
|
||||
// TILED: scf.for {{.*}} step %[[C3]]
|
||||
// TILED-NOT: scf.for
|
||||
// TILED: linalg.generic
|
||||
// TILED: linalg.generic
|
||||
// TILED: subf
|
||||
|
@ -118,8 +118,8 @@ func @fusion_of_three(%arg0: memref<100x10xf32>,
|
|||
|
||||
// PLOOP-LABEL: func @fusion_of_three
|
||||
// PLOOP-NOT: linalg.generic
|
||||
// PLOOP: loop.parallel
|
||||
// PLOOP-NOT: loop.parallel
|
||||
// PLOOP: scf.parallel
|
||||
// PLOOP-NOT: scf.parallel
|
||||
// PLOOP: linalg.generic
|
||||
// PLOOP: linalg.generic
|
||||
// PLOOP: subf
|
||||
|
@ -147,11 +147,11 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32
|
|||
// CHECK-LABEL: func @fusion_4d
|
||||
// CHECK: %[[C1:.*]] = constant 1
|
||||
// CHECK-NOT: linalg.generic
|
||||
// CHECK: loop.for {{.*}} step %[[C1]]
|
||||
// CHECK: loop.for {{.*}} step %[[C1]]
|
||||
// CHECK: loop.for {{.*}} step %[[C1]]
|
||||
// CHECK: loop.for {{.*}} step %[[C1]]
|
||||
// CHECK-NOT: loop.for
|
||||
// CHECK: scf.for {{.*}} step %[[C1]]
|
||||
// CHECK: scf.for {{.*}} step %[[C1]]
|
||||
// CHECK: scf.for {{.*}} step %[[C1]]
|
||||
// CHECK: scf.for {{.*}} step %[[C1]]
|
||||
// CHECK-NOT: scf.for
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: addf
|
||||
// CHECK: linalg.generic
|
||||
|
@ -161,9 +161,9 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32
|
|||
// TILED-DAG: %[[C2:.*]] = constant 2
|
||||
// TILED-DAG: %[[C3:.*]] = constant 3
|
||||
// TILED-NOT: linalg.generic
|
||||
// TILED: loop.for {{.*}} step %[[C2]]
|
||||
// TILED: loop.for {{.*}} step %[[C3]]
|
||||
// TILED-NOT: loop.for
|
||||
// TILED: scf.for {{.*}} step %[[C2]]
|
||||
// TILED: scf.for {{.*}} step %[[C3]]
|
||||
// TILED-NOT: scf.for
|
||||
// TILED: linalg.generic
|
||||
// TILED: addf
|
||||
// TILED: linalg.generic
|
||||
|
@ -171,8 +171,8 @@ func @fusion_4d(%multiplier: memref<6x6x6x6xf32>, %summand_1: memref<6x6x6x6xf32
|
|||
|
||||
// PLOOP-LABEL: func @fusion_4d
|
||||
// PLOOP-NOT: linalg.generic
|
||||
// PLOOP: loop.parallel
|
||||
// PLOOP-NOT: loop.parallel
|
||||
// PLOOP: scf.parallel
|
||||
// PLOOP-NOT: scf.parallel
|
||||
// PLOOP: linalg.generic
|
||||
// PLOOP: addf
|
||||
// PLOOP: linalg.generic
|
||||
|
|
|
@ -50,19 +50,19 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
|
|||
|
||||
// Parallel loop to initialize the output buffer.
|
||||
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]][] : memref<f32>
|
||||
// CHECK: loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK-SAME: to ([[C112]], [[C112]]) step ([[C1]], [[C1]]) {
|
||||
// CHECK: store [[INIT]], [[RESULT_BUF]]{{\[}}[[I]], [[J]]]
|
||||
// CHECK: loop.yield
|
||||
// CHECK: scf.yield
|
||||
// CHECK: }
|
||||
|
||||
// Parallel loop over source buffer to compute scattered values.
|
||||
// CHECK: loop.parallel ([[II:%.*]], [[JJ:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK: scf.parallel ([[II:%.*]], [[JJ:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK-SAME: to ([[C56]], [[C56]]) step ([[C1]], [[C1]]) {
|
||||
|
||||
// Window loop w.r.t. first dim.
|
||||
// CHECK: [[SEL_RES_I:%.*]]:4
|
||||
// CHECK-SAME: = loop.for [[WIN_I:%.*]] = [[C0]] to [[C3]] step [[C1]]
|
||||
// CHECK-SAME: = scf.for [[WIN_I:%.*]] = [[C0]] to [[C3]] step [[C1]]
|
||||
// CHECK-SAME: iter_args(
|
||||
// CHECK-SAME: [[SEL_I_0:%.*]] = [[C0]], [[SEL_J_0:%.*]] = [[C0]],
|
||||
// CHECK-SAME: [[SEL_VAL_0:%.*]] = [[C0_F32]],
|
||||
|
@ -71,7 +71,7 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
|
|||
|
||||
// Window loop w.r.t. second dim.
|
||||
// CHECK: [[SEL_RES_J:%.*]]:4
|
||||
// CHECK-SAME: = loop.for [[WIN_J:%.*]] = [[C0]] to [[C3]] step [[C1]]
|
||||
// CHECK-SAME: = scf.for [[WIN_J:%.*]] = [[C0]] to [[C3]] step [[C1]]
|
||||
// CHECK-SAME: iter_args(
|
||||
// CHECK-SAME: [[SEL_I:%.*]] = [[SEL_I_0]], [[SEL_J:%.*]] = [[SEL_J_0]],
|
||||
// CHECK-SAME: [[SEL_VAL:%.*]] = [[SEL_VAL_0]],
|
||||
|
@ -102,14 +102,14 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
|
|||
// be applied, current selected ivs (SEL_I, SEL_J) and value (SEL_VAL) are
|
||||
// returned in that case.
|
||||
// CHECK: [[IF_INBOUNDS_RES:%.*]]:4
|
||||
// CHECK-SAME: = loop.if [[INBOUNDS_1]] -> (index, index, f32, i1) {
|
||||
// CHECK-SAME: = scf.if [[INBOUNDS_1]] -> (index, index, f32, i1) {
|
||||
|
||||
|
||||
// INBOUNDS-THEN-BODY, i.e. if INBOUNDS == true
|
||||
|
||||
// CHECK: [[ARG_ELEM:%.*]] = load [[ARG_BUF]]{{\[}}[[ARG_I]], [[ARG_J]]]
|
||||
// CHECK: [[IF_INIT_RES:%.*]]:4
|
||||
// CHECK-SAME: = loop.if [[SEL_INIT]] -> (index, index, f32, i1) {
|
||||
// CHECK-SAME: = scf.if [[SEL_INIT]] -> (index, index, f32, i1) {
|
||||
|
||||
// INIT-THEN-BODY, i.e. INBOUNDS == true and INIT = true
|
||||
|
||||
|
@ -133,40 +133,40 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
|
|||
|
||||
|
||||
// Depending on PRED, return ARG ivs & elem or current select ivs and value.
|
||||
// CHECK: [[IF_PRED_RES:%.*]]:4 = loop.if [[PRED]]
|
||||
// CHECK: loop.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], [[CTRUE]]
|
||||
// CHECK: [[IF_PRED_RES:%.*]]:4 = scf.if [[PRED]]
|
||||
// CHECK: scf.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]], [[CTRUE]]
|
||||
// CHECK: } else {
|
||||
// CHECK: loop.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], [[SEL_INIT]]
|
||||
// CHECK: scf.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]], [[SEL_INIT]]
|
||||
// CHECK: }
|
||||
|
||||
// INIT-THEN-BODY yield.
|
||||
// CHECK: loop.yield [[IF_PRED_RES]]#0, [[IF_PRED_RES]]#1,
|
||||
// CHECK: scf.yield [[IF_PRED_RES]]#0, [[IF_PRED_RES]]#1,
|
||||
// CHECK-SAME: [[IF_PRED_RES]]#2, [[IF_PRED_RES]]#3
|
||||
|
||||
// INIT-ELSE-BODY, i.e. if INBOUNDS == TRUE and INIT == FALSE, returns ARG
|
||||
// ivs and element without computing Select function.
|
||||
// CHECK: loop.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]],
|
||||
// CHECK: scf.yield [[ARG_I]], [[ARG_J]], [[ARG_ELEM]],
|
||||
// CHECK-SAME: [[CTRUE]] : index, index, f32, i1
|
||||
// CHECK: }
|
||||
|
||||
// INBOUNDS-THEN-BODY yield.
|
||||
// CHECK: loop.yield [[IF_INIT_RES]]#0, [[IF_INIT_RES]]#1, [[IF_INIT_RES]]#2,
|
||||
// CHECK: scf.yield [[IF_INIT_RES]]#0, [[IF_INIT_RES]]#1, [[IF_INIT_RES]]#2,
|
||||
// CHECK-SAME: [[IF_INIT_RES]]#3 : index, index, f32, i1
|
||||
// CHECK: }
|
||||
|
||||
// INBOUNDS-ELSE-REGION, i.e. if INBOUNDS == FALSE
|
||||
// We are in the pad area, return current iter_args.
|
||||
// CHECK: loop.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]],
|
||||
// CHECK: scf.yield [[SEL_I]], [[SEL_J]], [[SEL_VAL]],
|
||||
// CHECK-SAME: [[SEL_INIT]] : index, index, f32, i1
|
||||
// CHECK: }
|
||||
|
||||
// Window loop w.r.t. second dim yield.
|
||||
// CHECK: loop.yield [[IF_INBOUNDS_RES]]#0, [[IF_INBOUNDS_RES]]#1,
|
||||
// CHECK: scf.yield [[IF_INBOUNDS_RES]]#0, [[IF_INBOUNDS_RES]]#1,
|
||||
// CHECK-SAME: [[IF_INBOUNDS_RES]]#2, [[IF_INBOUNDS_RES]]#3
|
||||
// CHECK: }
|
||||
|
||||
// Window loop w.r.t. first dim yield.
|
||||
// CHECK: loop.yield [[SEL_RES_J]]#0, [[SEL_RES_J]]#1, [[SEL_RES_J]]#2,
|
||||
// CHECK: scf.yield [[SEL_RES_J]]#0, [[SEL_RES_J]]#1, [[SEL_RES_J]]#2,
|
||||
// CHECK-SAME: [[SEL_RES_J]]#3 : index, index, f32, i1
|
||||
// CHECK: }
|
||||
|
||||
|
@ -196,4 +196,4 @@ func @select_and_scatter(%arg: memref<112x112xf32>,
|
|||
// CHECK: atomic_yield [[RES]] : f32
|
||||
|
||||
// Parallel loop over source buffer yield
|
||||
// CHECK: loop.yield
|
||||
// CHECK: scf.yield
|
||||
|
|
|
@ -22,7 +22,7 @@ func @reduce(%arg: memref<100x10xf32>,
|
|||
// CHECK-DAG: %[[LB:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[UB:.*]] = constant 10 : index
|
||||
// CHECK-DAG: %[[STEP:.*]] = constant 1 : index
|
||||
// CHECK: loop.for %[[IDX1:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
|
||||
// CHECK: scf.for %[[IDX1:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] {
|
||||
// CHECK: %[[LHS:.*]] = linalg.slice %[[ARG2]][%[[IDX]]] : memref<100xf32>, index, memref<f32, #map0>
|
||||
// CHECK: %[[RHS:.*]] = linalg.slice %[[ARG0]][%[[IDX]], %[[IDX1]]] : memref<100x10xf32>, index, index, memref<f32, #map0>
|
||||
// CHECK: "xla_lhlo.add"(%[[LHS]], %[[RHS]], %[[LHS]]) : (memref<f32, {{.*}}>, memref<f32, {{.*}}>, memref<f32, {{.*}}>) -> ()
|
||||
|
|
|
@ -22,13 +22,13 @@ func @reduce(%arg: memref<100x10x5xf32>,
|
|||
// CHECK-DAG: [[C10:%.*]] = constant 10 : index
|
||||
// CHECK-DAG: [[C100:%.*]] = constant 100 : index
|
||||
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]]
|
||||
// CHECK: loop.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK: scf.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK-SAME: to ([[C100]], [[C5]]) step ([[C1]], [[C1]]) {
|
||||
// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel ([[J:%.*]]) =
|
||||
// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel ([[J:%.*]]) =
|
||||
// CHECK-SAME: ([[C0]]) to ([[C10]]) step ([[C1]]) init ([[INIT]]) -> f32 {
|
||||
// CHECK: [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]
|
||||
// CHECK-SAME: {{\[}}[[I]], [[J]], [[K]]] : memref<100x10x5xf32>
|
||||
// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
|
||||
// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
|
||||
// CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
|
||||
// CHECK: [[ELEM_BUF:%.*]] = alloc() : memref<f32>
|
||||
// CHECK: [[ACC_BUF:%.*]] = alloc() : memref<f32>
|
||||
|
@ -37,12 +37,12 @@ func @reduce(%arg: memref<100x10x5xf32>,
|
|||
// CHECK: store [[ACC]], [[ACC_BUF]][] : memref<f32>
|
||||
// CHECK: "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
|
||||
// CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
|
||||
// CHECK: loop.reduce.return [[ACC_RESULT]] : f32
|
||||
// CHECK: scf.reduce.return [[ACC_RESULT]] : f32
|
||||
// CHECK: }
|
||||
// CHECK: loop.yield
|
||||
// CHECK: scf.yield
|
||||
// CHECK: }
|
||||
// CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[K]]]
|
||||
// CHECK: loop.yield
|
||||
// CHECK: scf.yield
|
||||
|
||||
// -----
|
||||
|
||||
|
@ -66,10 +66,10 @@ func @reduce_no_outer_loop(%arg: memref<100xf32>,
|
|||
// CHECK-DAG: [[C1:%.*]] = constant 1 : index
|
||||
// CHECK-DAG: [[C100:%.*]] = constant 100 : index
|
||||
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]]
|
||||
// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel ([[I:%.*]]) = ([[C0]])
|
||||
// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel ([[I:%.*]]) = ([[C0]])
|
||||
// CHECK-SAME: to ([[C100]]) step ([[C1]]) init ([[INIT]]) -> f32 {
|
||||
// CHECK: [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]{{\[}}[[I]]{{\]}}
|
||||
// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
|
||||
// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
|
||||
// CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
|
||||
// CHECK: [[ELEM_BUF:%.*]] = alloc() : memref<f32>
|
||||
// CHECK: [[ACC_BUF:%.*]] = alloc() : memref<f32>
|
||||
|
@ -78,9 +78,9 @@ func @reduce_no_outer_loop(%arg: memref<100xf32>,
|
|||
// CHECK: store [[ACC]], [[ACC_BUF]][] : memref<f32>
|
||||
// CHECK: "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
|
||||
// CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
|
||||
// CHECK: loop.reduce.return [[ACC_RESULT]]
|
||||
// CHECK: scf.reduce.return [[ACC_RESULT]]
|
||||
// CHECK: }
|
||||
// CHECK: loop.yield
|
||||
// CHECK: scf.yield
|
||||
// CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[C0]]]
|
||||
|
||||
// -----
|
||||
|
@ -107,13 +107,13 @@ func @dynamic_reduce(%arg: memref<?x?x?xf32>,
|
|||
// CHECK: [[DIM1:%.*]] = dim [[ARG_BUF]], 1 : memref<?x?x?xf32>
|
||||
// CHECK: [[DIM2:%.*]] = dim [[ARG_BUF]], 2 : memref<?x?x?xf32>
|
||||
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]]
|
||||
// CHECK: loop.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK: scf.parallel ([[I:%.*]], [[K:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK-SAME: to ([[DIM0]], [[DIM2]]) step ([[C1]], [[C1]]) {
|
||||
// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel ([[J:%.*]]) =
|
||||
// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel ([[J:%.*]]) =
|
||||
// CHECK-SAME: ([[C0]]) to ([[DIM1]]) step ([[C1]]) init ([[INIT]]) -> f32 {
|
||||
// CHECK: [[ELEM_TO_REDUCE:%.*]] = load [[ARG_BUF]]
|
||||
// CHECK-SAME: {{\[}}[[I]], [[J]], [[K]]] : memref<?x?x?xf32>
|
||||
// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
|
||||
// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
|
||||
// CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
|
||||
// CHECK: [[ELEM_BUF:%.*]] = alloc() : memref<f32>
|
||||
// CHECK: [[ACC_BUF:%.*]] = alloc() : memref<f32>
|
||||
|
@ -122,12 +122,12 @@ func @dynamic_reduce(%arg: memref<?x?x?xf32>,
|
|||
// CHECK: store [[ACC]], [[ACC_BUF]][] : memref<f32>
|
||||
// CHECK: "xla_lhlo.add"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
|
||||
// CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
|
||||
// CHECK: loop.reduce.return [[ACC_RESULT]] : f32
|
||||
// CHECK: scf.reduce.return [[ACC_RESULT]] : f32
|
||||
// CHECK: }
|
||||
// CHECK: loop.yield
|
||||
// CHECK: scf.yield
|
||||
// CHECK: }
|
||||
// CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[K]]]
|
||||
// CHECK: loop.yield
|
||||
// CHECK: scf.yield
|
||||
|
||||
// -----
|
||||
|
||||
|
@ -158,9 +158,9 @@ func @reduce_window(%arg: memref<112x112xf32>,
|
|||
// CHECK-DAG: [[C56:%.*]] = constant 56 : index
|
||||
// CHECK-DAG: [[C112:%.*]] = constant 112 : index
|
||||
// CHECK: [[INIT:%.*]] = load [[INIT_BUF]][] : memref<f32>
|
||||
// CHECK: loop.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK-SAME: to ([[C56]], [[C56]]) step ([[C1]], [[C1]]) {
|
||||
// CHECK: [[REDUCTION_RESULT:%.*]] = loop.parallel
|
||||
// CHECK: [[REDUCTION_RESULT:%.*]] = scf.parallel
|
||||
// CHECK-SAME: ([[IW:%.*]], [[JW:%.*]]) = ([[C0]], [[C0]])
|
||||
// CHECK-SAME: to ([[C3]], [[C3]]) step ([[C1]], [[C1]])
|
||||
// CHECK-SAME: init ([[INIT]]) -> f32 {
|
||||
|
@ -177,15 +177,15 @@ func @reduce_window(%arg: memref<112x112xf32>,
|
|||
// CHECK: [[INDEX_J_FITS:%.*]] = cmpi "ult", [[INDEX_J]], [[C112]]
|
||||
// CHECK: [[IN_BOUNDS_1:%.*]] = and [[IN_BOUNDS_0]], [[INDEX_J_FITS]]
|
||||
|
||||
// CHECK: [[ELEM_TO_REDUCE:%.*]] = loop.if [[IN_BOUNDS_1]] -> (f32) {
|
||||
// CHECK: [[ELEM_TO_REDUCE:%.*]] = scf.if [[IN_BOUNDS_1]] -> (f32) {
|
||||
// CHECK: [[OPERAND_ELEM:%.*]] =
|
||||
// CHECK-SAME: load [[OPERAND_BUF]]{{\[}}[[INDEX_I]], [[INDEX_J]]]
|
||||
// CHECK: loop.yield [[OPERAND_ELEM]] : f32
|
||||
// CHECK: scf.yield [[OPERAND_ELEM]] : f32
|
||||
// CHECK: } else {
|
||||
// CHECK: loop.yield [[INIT]] : f32
|
||||
// CHECK: scf.yield [[INIT]] : f32
|
||||
// CHECK: }
|
||||
|
||||
// CHECK: loop.reduce([[ELEM_TO_REDUCE]]) : f32 {
|
||||
// CHECK: scf.reduce([[ELEM_TO_REDUCE]]) : f32 {
|
||||
// CHECK: ^bb0([[ELEM:%.*]]: f32, [[ACC:%.*]]: f32):
|
||||
// CHECK: [[ELEM_BUF:%.*]] = alloc() : memref<f32>
|
||||
// CHECK: [[ACC_BUF:%.*]] = alloc() : memref<f32>
|
||||
|
@ -194,12 +194,12 @@ func @reduce_window(%arg: memref<112x112xf32>,
|
|||
// CHECK: store [[ACC]], [[ACC_BUF]][] : memref<f32>
|
||||
// CHECK: "xla_lhlo.maximum"([[ELEM_BUF]], [[ACC_BUF]], [[ACC_OUT_BUF]])
|
||||
// CHECK: [[ACC_RESULT:%.*]] = load [[ACC_OUT_BUF]][] : memref<f32>
|
||||
// CHECK: loop.reduce.return [[ACC_RESULT]] : f32
|
||||
// CHECK: scf.reduce.return [[ACC_RESULT]] : f32
|
||||
// CHECK: }
|
||||
// CHECK: loop.yield
|
||||
// CHECK: scf.yield
|
||||
// CHECK: }
|
||||
// CHECK: store [[REDUCTION_RESULT]], [[RESULT_BUF]]{{\[}}[[I]], [[J]]]
|
||||
// CHECK: loop.yield
|
||||
// CHECK: scf.yield
|
||||
// CHECK: }
|
||||
// CHECK: return
|
||||
// CHECK: }
|
||||
|
|
|
@ -61,8 +61,8 @@ Value ApplySingleResultLhloCode(Location loc, ValueRange operands,
|
|||
|
||||
// Converts a block with LHLO ops and with signature:
|
||||
// ^bb(%lhs: memref<f32>, %rhs: memref<f32>, %res: memref<f32>):
|
||||
// into a reduction operator of loop.reduce by doing buffer allocation for
|
||||
// scalar arguments and the result of `loop.reduce` to make it compatible with
|
||||
// into a reduction operator of scf.reduce by doing buffer allocation for
|
||||
// scalar arguments and the result of `scf.reduce` to make it compatible with
|
||||
// LHLO ops.
|
||||
void ConvertToReductionOperator(Location loc, scf::ReduceOp reduce_op,
|
||||
Block* lhlo_block, OpBuilder* b) {
|
||||
|
@ -170,10 +170,10 @@ scf::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value,
|
|||
// is roughly converted into:
|
||||
//
|
||||
// %init = load %init_buf[] : memref<f32>
|
||||
// loop.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
|
||||
// %result = loop.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
|
||||
// scf.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
|
||||
// %result = scf.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
|
||||
// %elem_to_reduce = load %buffer[%i, %j, %k] : memref<100x10x5xf32>
|
||||
// loop.reduce(%elem_to_reduce) {
|
||||
// scf.reduce(%elem_to_reduce) {
|
||||
// ^bb0(%elem: f32, %acc: f32): // no predecessors
|
||||
// elem_buf = alloc() : memref<f32>
|
||||
// store %elem, elem_buf[] : memref<f32>
|
||||
|
@ -181,11 +181,11 @@ scf::ParallelOp MakeLoopOverShape(Location loc, Value shaped_value,
|
|||
// store %acc, acc_buf[] : memref<f32>
|
||||
// <LHLO_ops>
|
||||
// %acc_result = load acc_buf[] : memref<f32>
|
||||
// loop.reduce.return %acc_result : f32
|
||||
// scf.reduce.return %acc_result : f32
|
||||
// } : f32
|
||||
// loop.yield
|
||||
// scf.yield
|
||||
// } : f32
|
||||
// loop.yield
|
||||
// scf.yield
|
||||
// }
|
||||
class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
|
||||
public:
|
||||
|
@ -206,24 +206,24 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
|
|||
}
|
||||
|
||||
private:
|
||||
// Creates nested `loop.parallel` ops with `loop.reduce`. The outer ParallelOp
|
||||
// Creates nested `scf.parallel` ops with `scf.reduce`. The outer ParallelOp
|
||||
// refers to the parallel dimensions of `xla_reduce_op` if any and the inner
|
||||
// ParallelOp refers to the reduction dimensions. The loop.reduce op is
|
||||
// ParallelOp refers to the reduction dimensions. The scf.reduce op is
|
||||
// returned.
|
||||
//
|
||||
// If the reduction argument is a memref<100x10x5xf32> and the
|
||||
// reduction is performed along dimension 1 then this method will generate
|
||||
//
|
||||
// %init = load %init_buf[] : memref<f32>
|
||||
// loop.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
|
||||
// %result = loop.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
|
||||
// scf.parallel (%i, %k) = (%c0, %c0) to (%c100, %c5) step (%c1, %c1) {
|
||||
// %result = scf.parallel (%j) = (%c0) to (%c10) step (%c1) init (%init) {
|
||||
// %elem_to_reduce = load %buffer[%i, %j, %k] : memref<100x10x5xf32>
|
||||
// loop.reduce(%elem_to_reduce) {
|
||||
// scf.reduce(%elem_to_reduce) {
|
||||
// <THE BLOCK PTR TO BE RETURNED>
|
||||
// } : f32
|
||||
// loop.yield
|
||||
// scf.yield
|
||||
// } : f32
|
||||
// loop.yield
|
||||
// scf.yield
|
||||
// }
|
||||
scf::ReduceOp CreateReduceOpInNestedParallelLoops(
|
||||
xla_lhlo::ReduceOp xla_reduce_op,
|
||||
|
@ -341,20 +341,20 @@ class ReduceOpConverter : public OpConversionPattern<xla_lhlo::ReduceOp> {
|
|||
// is roughly converted into:
|
||||
//
|
||||
// %neutral_elem = load %init_buf[] : memref<f32>
|
||||
// loop.parallel (%i, %j) = (%c0, %c0) to (%c56, %c56) step (%c1, %c1) {
|
||||
// %result = loop.parallel (%iw, %jw) = (%c0, %c0)
|
||||
// scf.parallel (%i, %j) = (%c0, %c0) to (%c56, %c56) step (%c1, %c1) {
|
||||
// %result = scf.parallel (%iw, %jw) = (%c0, %c0)
|
||||
// to (%c3, %c3) step (%c1, %c1) neutral_elem (%0) -> f32 {
|
||||
// %in_bounds = <COMPUTE IF INDEX IS IN OPERAND'S pad>
|
||||
// %elem = load %operand[%computed_i, %computed_j]
|
||||
// %elem_or_neutral = select %in_bounds, %elem, %neutral_elem : f32
|
||||
// loop.reduce(%elem_to_reduce) : f32 {
|
||||
// scf.reduce(%elem_to_reduce) : f32 {
|
||||
// ^bb0(%arg7: f32, %arg8: f32):
|
||||
// <LHLO ops>
|
||||
// }
|
||||
// loop.yield
|
||||
// scf.yield
|
||||
// }
|
||||
// store %result, %output_buffer[%i, %j] : memref<56x56xf32>
|
||||
// loop.yield
|
||||
// scf.yield
|
||||
// }
|
||||
// return
|
||||
// }
|
||||
|
@ -457,16 +457,16 @@ class ReduceWindowOpConverter
|
|||
// https://www.tensorflow.org/xla/operation_semantics#selectandscatter
|
||||
//
|
||||
// Pseudocode:
|
||||
// loop.parallel(coordinates O in the output):
|
||||
// scf.parallel(coordinates O in the output):
|
||||
// output[O] = init
|
||||
// loop.parallel(coordinates S in the source):
|
||||
// scf.parallel(coordinates S in the source):
|
||||
// selected_ivs = 0
|
||||
// selected_val = 0
|
||||
// initialized_flag = false
|
||||
// loop.for (first dim W_1 in the window)
|
||||
// scf.for (first dim W_1 in the window)
|
||||
// iter_args (selected_ivs, selected_val, initialized_flag):
|
||||
// ...
|
||||
// loop.for (last dim W_N in the window):
|
||||
// scf.for (last dim W_N in the window):
|
||||
// iter_args (selected_ivs, selected_val, initialized_flag):
|
||||
// I = S * stride + W - pad_low
|
||||
// if I within bounds of operand:
|
||||
|
|
|
@ -81,8 +81,8 @@ std::unique_ptr<OperationPass<FuncOp>> createLegalizeToGpuPass();
|
|||
// Fuses linalg ops obtained after LHLO lowering. To enable fusion,
|
||||
// operations are first tiled.
|
||||
//
|
||||
// When 'use_parallel_loops' is set, the tiling will use loop.parallel
|
||||
// operations. Otherwise, loop.for operations are used.
|
||||
// When 'use_parallel_loops' is set, the tiling will use scf.parallel
|
||||
// operations. Otherwise, scf.for operations are used.
|
||||
//
|
||||
// 'tile_sizes' provides the tile sizes to use for tiling. If the linalg
|
||||
// operation has more dimensions than tile sizes provided, 1 is used as
|
||||
|
|
Loading…
Reference in New Issue