[MLIR] Switch to LMHLO-based emitter for loop fusion.

PiperOrigin-RevId: 338551650
Change-Id: Iec9f30ca504eacf9808dbf7c65554d561b754456
This commit is contained in:
Tim Shen 2020-10-22 14:39:19 -07:00 committed by TensorFlower Gardener
parent 73ec60de15
commit c17952e2c5
2 changed files with 136 additions and 119 deletions

View File

@ -1144,7 +1144,22 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
return Status::OK();
}
return IrEmitter::HandleFusion(fusion);
int unroll_factor = 1;
if (!MayPreventVectorization(*fusion)) {
unroll_factor = ComputeMaxUnrollFactor(fusion);
}
MlirEmitterInput input;
TF_ASSIGN_OR_RETURN(input.op, lhlo_scratch_emitter_.EmitFusionOp(fusion));
const auto& buffer_assignment = ir_emitter_context_->buffer_assignment();
auto& slice = input.extra_slice;
TF_ASSIGN_OR_RETURN(slice.buffer_slice,
buffer_assignment.GetUniqueSlice(fusion, {}));
slice.written = true;
slice.shape = fusion->shape();
input.thunk_info = GetThunkInfo(fusion);
return EmitLoopFusionFromMlir(input, fusion->shape(), unroll_factor);
}
Status IrEmitterUnnested::HandleCopy(HloInstruction* copy) {

View File

@ -4,7 +4,7 @@ HloModule TestModule
// CHECK-LABEL: entry:
// CHECK: %[[VAL_0:.*]] = getelementptr inbounds i8, i8* %[[VAL_1:.*]], i64 0
// CHECK: %[[VAL_2:.*]] = bitcast i8* %[[VAL_0]] to [128 x [112 x [112 x [64 x half]]]]*
// CHECK: %[[VAL_2:.*]] = bitcast i8* %[[VAL_0]] to [64 x float]*
// CHECK: %[[VAL_3:.*]] = getelementptr inbounds i8, i8* %[[VAL_4:.*]], i64 0
// CHECK: %[[VAL_5:.*]] = bitcast i8* %[[VAL_3]] to [64 x float]*
// CHECK: %[[VAL_6:.*]] = getelementptr inbounds i8, i8* %[[VAL_7:.*]], i64 0
@ -16,283 +16,285 @@ HloModule TestModule
// CHECK: %[[VAL_15:.*]] = getelementptr inbounds i8, i8* %[[VAL_16:.*]], i64 0
// CHECK: %[[VAL_17:.*]] = bitcast i8* %[[VAL_15]] to [64 x float]*
// CHECK: %[[VAL_18:.*]] = getelementptr inbounds i8, i8* %[[VAL_19:.*]], i64 0
// CHECK: %[[VAL_20:.*]] = bitcast i8* %[[VAL_18]] to [64 x float]*
// CHECK: %[[VAL_20:.*]] = bitcast i8* %[[VAL_18]] to [128 x [112 x [112 x [64 x half]]]]*
// CHECK: %[[VAL_21:.*]] = getelementptr inbounds i8, i8* %[[VAL_22:.*]], i64 0
// CHECK: %[[VAL_23:.*]] = bitcast i8* %[[VAL_21]] to [128 x [112 x [112 x [64 x half]]]]*
// CHECK: %[[VAL_24:.*]] = getelementptr inbounds i8, i8* %[[VAL_25:.*]], i64 0
// CHECK: %[[VAL_26:.*]] = bitcast i8* %[[VAL_24]] to [128 x [112 x [112 x [64 x half]]]]*
// CHECK: %[[VAL_27:.*]] = getelementptr inbounds i8, i8* %[[VAL_28:.*]], i64 0
// CHECK: %[[VAL_29:.*]] = bitcast i8* %[[VAL_27]] to [128 x [112 x [112 x [64 x half]]]]*
// CHECK: %[[VAL_30:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !2
// CHECK: %[[VAL_31:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !3
// CHECK: %[[VAL_32:.*]] = mul nuw nsw i32 %[[VAL_30]], 256
// CHECK: %[[VAL_33:.*]] = add nuw nsw i32 %[[VAL_32]], %[[VAL_31]]
// CHECK: %[[VAL_34:.*]] = icmp ult i32 %[[VAL_33]], 25690112
// CHECK: call void @llvm.assume(i1 %[[VAL_34]])
// CHECK: %[[VAL_35:.*]] = mul nuw nsw i32 %[[VAL_33]], 4
// CHECK: %[[VAL_36:.*]] = udiv i32 %[[VAL_35]], 1
// CHECK: %[[VAL_37:.*]] = urem i32 %[[VAL_36]], 64
// CHECK: %[[VAL_38:.*]] = udiv i32 %[[VAL_35]], 64
// CHECK: %[[VAL_39:.*]] = urem i32 %[[VAL_38]], 112
// CHECK: %[[VAL_40:.*]] = udiv i32 %[[VAL_35]], 7168
// CHECK: %[[VAL_30:.*]] = getelementptr inbounds i8, i8* %[[VAL_28]], i64 0
// CHECK: %[[VAL_31:.*]] = bitcast i8* %[[VAL_30]] to [128 x [112 x [112 x [64 x half]]]]*
// CHECK: %[[VAL_32:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !2
// CHECK: %[[VAL_33:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !3
// CHECK: %[[VAL_34:.*]] = mul nuw nsw i32 %[[VAL_32]], 256
// CHECK: %[[VAL_35:.*]] = add nuw nsw i32 %[[VAL_34]], %[[VAL_33]]
// CHECK: %[[VAL_36:.*]] = icmp ult i32 %[[VAL_35]], 25690112
// CHECK: call void @llvm.assume(i1 %[[VAL_36]])
// CHECK: %[[VAL_37:.*]] = mul nuw nsw i32 %[[VAL_35]], 4
// CHECK: %[[VAL_38:.*]] = udiv i32 %[[VAL_37]], 1
// CHECK: %[[VAL_39:.*]] = urem i32 %[[VAL_38]], 64
// CHECK: %[[VAL_40:.*]] = udiv i32 %[[VAL_37]], 64
// CHECK: %[[VAL_41:.*]] = urem i32 %[[VAL_40]], 112
// CHECK: %[[VAL_42:.*]] = udiv i32 %[[VAL_35]], 802816
// CHECK: %[[VAL_43:.*]] = add nuw nsw i32 %[[VAL_35]], 1
// CHECK: %[[VAL_44:.*]] = udiv i32 %[[VAL_43]], 1
// CHECK: %[[VAL_45:.*]] = urem i32 %[[VAL_44]], 64
// CHECK: %[[VAL_46:.*]] = udiv i32 %[[VAL_43]], 64
// CHECK: %[[VAL_47:.*]] = urem i32 %[[VAL_46]], 112
// CHECK: %[[VAL_48:.*]] = udiv i32 %[[VAL_43]], 7168
// CHECK: %[[VAL_42:.*]] = udiv i32 %[[VAL_37]], 7168
// CHECK: %[[VAL_43:.*]] = urem i32 %[[VAL_42]], 112
// CHECK: %[[VAL_44:.*]] = udiv i32 %[[VAL_37]], 802816
// CHECK: %[[VAL_45:.*]] = add nuw nsw i32 %[[VAL_37]], 1
// CHECK: %[[VAL_46:.*]] = udiv i32 %[[VAL_45]], 1
// CHECK: %[[VAL_47:.*]] = urem i32 %[[VAL_46]], 64
// CHECK: %[[VAL_48:.*]] = udiv i32 %[[VAL_45]], 64
// CHECK: %[[VAL_49:.*]] = urem i32 %[[VAL_48]], 112
// CHECK: %[[VAL_50:.*]] = udiv i32 %[[VAL_43]], 802816
// CHECK: %[[VAL_51:.*]] = add nuw nsw i32 %[[VAL_35]], 2
// CHECK: %[[VAL_52:.*]] = udiv i32 %[[VAL_51]], 1
// CHECK: %[[VAL_53:.*]] = urem i32 %[[VAL_52]], 64
// CHECK: %[[VAL_54:.*]] = udiv i32 %[[VAL_51]], 64
// CHECK: %[[VAL_55:.*]] = urem i32 %[[VAL_54]], 112
// CHECK: %[[VAL_56:.*]] = udiv i32 %[[VAL_51]], 7168
// CHECK: %[[VAL_50:.*]] = udiv i32 %[[VAL_45]], 7168
// CHECK: %[[VAL_51:.*]] = urem i32 %[[VAL_50]], 112
// CHECK: %[[VAL_52:.*]] = udiv i32 %[[VAL_45]], 802816
// CHECK: %[[VAL_53:.*]] = add nuw nsw i32 %[[VAL_37]], 2
// CHECK: %[[VAL_54:.*]] = udiv i32 %[[VAL_53]], 1
// CHECK: %[[VAL_55:.*]] = urem i32 %[[VAL_54]], 64
// CHECK: %[[VAL_56:.*]] = udiv i32 %[[VAL_53]], 64
// CHECK: %[[VAL_57:.*]] = urem i32 %[[VAL_56]], 112
// CHECK: %[[VAL_58:.*]] = udiv i32 %[[VAL_51]], 802816
// CHECK: %[[VAL_59:.*]] = add nuw nsw i32 %[[VAL_35]], 3
// CHECK: %[[VAL_60:.*]] = udiv i32 %[[VAL_59]], 1
// CHECK: %[[VAL_61:.*]] = urem i32 %[[VAL_60]], 64
// CHECK: %[[VAL_62:.*]] = udiv i32 %[[VAL_59]], 64
// CHECK: %[[VAL_63:.*]] = urem i32 %[[VAL_62]], 112
// CHECK: %[[VAL_64:.*]] = udiv i32 %[[VAL_59]], 7168
// CHECK: %[[VAL_58:.*]] = udiv i32 %[[VAL_53]], 7168
// CHECK: %[[VAL_59:.*]] = urem i32 %[[VAL_58]], 112
// CHECK: %[[VAL_60:.*]] = udiv i32 %[[VAL_53]], 802816
// CHECK: %[[VAL_61:.*]] = add nuw nsw i32 %[[VAL_37]], 3
// CHECK: %[[VAL_62:.*]] = udiv i32 %[[VAL_61]], 1
// CHECK: %[[VAL_63:.*]] = urem i32 %[[VAL_62]], 64
// CHECK: %[[VAL_64:.*]] = udiv i32 %[[VAL_61]], 64
// CHECK: %[[VAL_65:.*]] = urem i32 %[[VAL_64]], 112
// CHECK: %[[VAL_66:.*]] = udiv i32 %[[VAL_59]], 802816
// CHECK: %[[VAL_67:.*]] = icmp ult i32 %[[VAL_35]], 102760448
// CHECK: br i1 %[[VAL_67]], label %[[VAL_70:.*]], label %[[VAL_71:.*]]
// CHECK: %[[VAL_66:.*]] = udiv i32 %[[VAL_61]], 7168
// CHECK: %[[VAL_67:.*]] = urem i32 %[[VAL_66]], 112
// CHECK: %[[VAL_68:.*]] = udiv i32 %[[VAL_61]], 802816
// CHECK: %[[VAL_69:.*]] = icmp ult i32 %[[VAL_37]], 102760448
// CHECK: br i1 %[[VAL_69]], label %[[VAL_70:.*]], label %[[VAL_71:.*]]
// CHECK: fusion.1.in_bounds-after: ; preds = %[[VAL_70]], %[[VAL_72:.*]]
// CHECK: ret void
// CHECK: fusion.1.in_bounds-true: ; preds = %[[VAL_72]]
// CHECK: %[[VAL_73:.*]] = urem i32 %[[VAL_35]], 64
// CHECK: %[[VAL_74:.*]] = bitcast [64 x float]* %[[VAL_17]] to float*
// CHECK: %[[VAL_73:.*]] = urem i32 %[[VAL_37]], 64
// CHECK: %[[VAL_74:.*]] = bitcast [64 x float]* %[[VAL_14]] to float*
// CHECK: %[[VAL_75:.*]] = getelementptr inbounds float, float* %[[VAL_74]], i32 %[[VAL_73]]
// CHECK: %[[VAL_76:.*]] = load float, float* %[[VAL_75]], align 4, !invariant.load !4
// CHECK: %[[VAL_77:.*]] = urem i32 %[[VAL_35]], 64
// CHECK: %[[VAL_78:.*]] = bitcast [64 x float]* %[[VAL_14]] to float*
// CHECK: %[[VAL_77:.*]] = urem i32 %[[VAL_37]], 64
// CHECK: %[[VAL_78:.*]] = bitcast [64 x float]* %[[VAL_11]] to float*
// CHECK: %[[VAL_79:.*]] = getelementptr inbounds float, float* %[[VAL_78]], i32 %[[VAL_77]]
// CHECK: %[[VAL_80:.*]] = load float, float* %[[VAL_79]], align 4, !invariant.load !4
// CHECK: %[[VAL_81:.*]] = fmul float %[[VAL_76]], %[[VAL_80]]
// CHECK: %[[VAL_82:.*]] = load float, float* bitcast ([4 x i8]* @0 to float*), align 4
// CHECK: %[[VAL_83:.*]] = fmul float %[[VAL_81]], %[[VAL_82]]
// CHECK: %[[VAL_84:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_29]] to half*
// CHECK: %[[VAL_85:.*]] = getelementptr inbounds half, half* %[[VAL_84]], i32 %[[VAL_35]]
// CHECK: %[[VAL_84:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_26]] to half*
// CHECK: %[[VAL_85:.*]] = getelementptr inbounds half, half* %[[VAL_84]], i32 %[[VAL_37]]
// CHECK: %[[VAL_86:.*]] = load half, half* %[[VAL_85]], align 2, !invariant.load !4
// CHECK: %[[VAL_87:.*]] = load half, half* bitcast ([2 x i8]* @1 to half*), align 2
// CHECK: %[[VAL_88:.*]] = fcmp ogt half %[[VAL_86]], %[[VAL_87]]
// CHECK: %[[VAL_89:.*]] = zext i1 %[[VAL_88]] to i8
// CHECK: %[[VAL_90:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_26]] to half*
// CHECK: %[[VAL_91:.*]] = getelementptr inbounds half, half* %[[VAL_90]], i32 %[[VAL_35]]
// CHECK: %[[VAL_90:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_23]] to half*
// CHECK: %[[VAL_91:.*]] = getelementptr inbounds half, half* %[[VAL_90]], i32 %[[VAL_37]]
// CHECK: %[[VAL_92:.*]] = load half, half* %[[VAL_91]], align 2, !invariant.load !4
// CHECK: %[[VAL_93:.*]] = trunc i8 %[[VAL_89]] to i1
// CHECK: %[[VAL_94:.*]] = select i1 %[[VAL_93]], half %[[VAL_92]], half %[[VAL_87]]
// CHECK: %[[VAL_95:.*]] = fpext half %[[VAL_94]] to float
// CHECK: %[[VAL_96:.*]] = load float, float* bitcast ([4 x i8]* @2 to float*), align 4
// CHECK: %[[VAL_97:.*]] = fmul float %[[VAL_95]], %[[VAL_96]]
// CHECK: %[[VAL_98:.*]] = urem i32 %[[VAL_35]], 64
// CHECK: %[[VAL_99:.*]] = bitcast [64 x float]* %[[VAL_11]] to float*
// CHECK: %[[VAL_98:.*]] = urem i32 %[[VAL_37]], 64
// CHECK: %[[VAL_99:.*]] = bitcast [64 x float]* %[[VAL_8]] to float*
// CHECK: %[[VAL_100:.*]] = getelementptr inbounds float, float* %[[VAL_99]], i32 %[[VAL_98]]
// CHECK: %[[VAL_101:.*]] = load float, float* %[[VAL_100]], align 4, !invariant.load !4
// CHECK: %[[VAL_102:.*]] = fsub float %[[VAL_97]], %[[VAL_101]]
// CHECK: %[[VAL_103:.*]] = urem i32 %[[VAL_35]], 64
// CHECK: %[[VAL_104:.*]] = bitcast [64 x float]* %[[VAL_8]] to float*
// CHECK: %[[VAL_103:.*]] = urem i32 %[[VAL_37]], 64
// CHECK: %[[VAL_104:.*]] = bitcast [64 x float]* %[[VAL_5]] to float*
// CHECK: %[[VAL_105:.*]] = getelementptr inbounds float, float* %[[VAL_104]], i32 %[[VAL_103]]
// CHECK: %[[VAL_106:.*]] = load float, float* %[[VAL_105]], align 4, !invariant.load !4
// CHECK: %[[VAL_107:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_23]] to half*
// CHECK: %[[VAL_108:.*]] = getelementptr inbounds half, half* %[[VAL_107]], i32 %[[VAL_35]]
// CHECK: %[[VAL_107:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_20]] to half*
// CHECK: %[[VAL_108:.*]] = getelementptr inbounds half, half* %[[VAL_107]], i32 %[[VAL_37]]
// CHECK: %[[VAL_109:.*]] = load half, half* %[[VAL_108]], align 2, !invariant.load !4
// CHECK: %[[VAL_110:.*]] = fpext half %[[VAL_109]] to float
// CHECK: %[[VAL_111:.*]] = urem i32 %[[VAL_35]], 64
// CHECK: %[[VAL_112:.*]] = bitcast [64 x float]* %[[VAL_20]] to float*
// CHECK: %[[VAL_111:.*]] = urem i32 %[[VAL_37]], 64
// CHECK: %[[VAL_112:.*]] = bitcast [64 x float]* %[[VAL_17]] to float*
// CHECK: %[[VAL_113:.*]] = getelementptr inbounds float, float* %[[VAL_112]], i32 %[[VAL_111]]
// CHECK: %[[VAL_114:.*]] = load float, float* %[[VAL_113]], align 4, !invariant.load !4
// CHECK: %[[VAL_115:.*]] = load float, float* bitcast ([4 x i8]* @3 to float*), align 4
// CHECK: %[[VAL_116:.*]] = fmul float %[[VAL_114]], %[[VAL_115]]
// CHECK: %[[VAL_117:.*]] = fsub float %[[VAL_110]], %[[VAL_116]]
// CHECK: %[[VAL_118:.*]] = fmul float %[[VAL_106]], %[[VAL_117]]
// CHECK: %[[VAL_119:.*]] = urem i32 %[[VAL_35]], 64
// CHECK: %[[VAL_120:.*]] = bitcast [64 x float]* %[[VAL_5]] to float*
// CHECK: %[[VAL_119:.*]] = urem i32 %[[VAL_37]], 64
// CHECK: %[[VAL_120:.*]] = bitcast [64 x float]* %[[VAL_2]] to float*
// CHECK: %[[VAL_121:.*]] = getelementptr inbounds float, float* %[[VAL_120]], i32 %[[VAL_119]]
// CHECK: %[[VAL_122:.*]] = load float, float* %[[VAL_121]], align 4, !invariant.load !4
// CHECK: %[[VAL_123:.*]] = fdiv float %[[VAL_118]], %[[VAL_122]]
// CHECK: %[[VAL_124:.*]] = fsub float %[[VAL_102]], %[[VAL_123]]
// CHECK: %[[VAL_125:.*]] = fmul float %[[VAL_83]], %[[VAL_124]]
// CHECK: %[[VAL_126:.*]] = fptrunc float %[[VAL_125]] to half
// CHECK: %[[VAL_127:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_2]] to half*
// CHECK: %[[VAL_128:.*]] = getelementptr inbounds half, half* %[[VAL_127]], i32 %[[VAL_35]]
// CHECK: %[[VAL_127:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_29]] to half*
// CHECK: %[[VAL_128:.*]] = getelementptr inbounds half, half* %[[VAL_127]], i32 %[[VAL_37]]
// CHECK: store half %[[VAL_126]], half* %[[VAL_128]], align 2
// CHECK: %[[VAL_129:.*]] = urem i32 %[[VAL_43]], 64
// CHECK: %[[VAL_130:.*]] = bitcast [64 x float]* %[[VAL_17]] to float*
// CHECK: %[[VAL_129:.*]] = urem i32 %[[VAL_45]], 64
// CHECK: %[[VAL_130:.*]] = bitcast [64 x float]* %[[VAL_14]] to float*
// CHECK: %[[VAL_131:.*]] = getelementptr inbounds float, float* %[[VAL_130]], i32 %[[VAL_129]]
// CHECK: %[[VAL_132:.*]] = load float, float* %[[VAL_131]], align 4, !invariant.load !4
// CHECK: %[[VAL_133:.*]] = urem i32 %[[VAL_43]], 64
// CHECK: %[[VAL_134:.*]] = bitcast [64 x float]* %[[VAL_14]] to float*
// CHECK: %[[VAL_133:.*]] = urem i32 %[[VAL_45]], 64
// CHECK: %[[VAL_134:.*]] = bitcast [64 x float]* %[[VAL_11]] to float*
// CHECK: %[[VAL_135:.*]] = getelementptr inbounds float, float* %[[VAL_134]], i32 %[[VAL_133]]
// CHECK: %[[VAL_136:.*]] = load float, float* %[[VAL_135]], align 4, !invariant.load !4
// CHECK: %[[VAL_137:.*]] = fmul float %[[VAL_132]], %[[VAL_136]]
// CHECK: %[[VAL_138:.*]] = load float, float* bitcast ([4 x i8]* @4 to float*), align 4
// CHECK: %[[VAL_139:.*]] = fmul float %[[VAL_137]], %[[VAL_138]]
// CHECK: %[[VAL_140:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_29]] to half*
// CHECK: %[[VAL_141:.*]] = getelementptr inbounds half, half* %[[VAL_140]], i32 %[[VAL_43]]
// CHECK: %[[VAL_140:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_26]] to half*
// CHECK: %[[VAL_141:.*]] = getelementptr inbounds half, half* %[[VAL_140]], i32 %[[VAL_45]]
// CHECK: %[[VAL_142:.*]] = load half, half* %[[VAL_141]], align 2, !invariant.load !4
// CHECK: %[[VAL_143:.*]] = load half, half* bitcast ([2 x i8]* @5 to half*), align 2
// CHECK: %[[VAL_144:.*]] = fcmp ogt half %[[VAL_142]], %[[VAL_143]]
// CHECK: %[[VAL_145:.*]] = zext i1 %[[VAL_144]] to i8
// CHECK: %[[VAL_146:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_26]] to half*
// CHECK: %[[VAL_147:.*]] = getelementptr inbounds half, half* %[[VAL_146]], i32 %[[VAL_43]]
// CHECK: %[[VAL_146:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_23]] to half*
// CHECK: %[[VAL_147:.*]] = getelementptr inbounds half, half* %[[VAL_146]], i32 %[[VAL_45]]
// CHECK: %[[VAL_148:.*]] = load half, half* %[[VAL_147]], align 2, !invariant.load !4
// CHECK: %[[VAL_149:.*]] = trunc i8 %[[VAL_145]] to i1
// CHECK: %[[VAL_150:.*]] = select i1 %[[VAL_149]], half %[[VAL_148]], half %[[VAL_143]]
// CHECK: %[[VAL_151:.*]] = fpext half %[[VAL_150]] to float
// CHECK: %[[VAL_152:.*]] = load float, float* bitcast ([4 x i8]* @6 to float*), align 4
// CHECK: %[[VAL_153:.*]] = fmul float %[[VAL_151]], %[[VAL_152]]
// CHECK: %[[VAL_154:.*]] = urem i32 %[[VAL_43]], 64
// CHECK: %[[VAL_155:.*]] = bitcast [64 x float]* %[[VAL_11]] to float*
// CHECK: %[[VAL_154:.*]] = urem i32 %[[VAL_45]], 64
// CHECK: %[[VAL_155:.*]] = bitcast [64 x float]* %[[VAL_8]] to float*
// CHECK: %[[VAL_156:.*]] = getelementptr inbounds float, float* %[[VAL_155]], i32 %[[VAL_154]]
// CHECK: %[[VAL_157:.*]] = load float, float* %[[VAL_156]], align 4, !invariant.load !4
// CHECK: %[[VAL_158:.*]] = fsub float %[[VAL_153]], %[[VAL_157]]
// CHECK: %[[VAL_159:.*]] = urem i32 %[[VAL_43]], 64
// CHECK: %[[VAL_160:.*]] = bitcast [64 x float]* %[[VAL_8]] to float*
// CHECK: %[[VAL_159:.*]] = urem i32 %[[VAL_45]], 64
// CHECK: %[[VAL_160:.*]] = bitcast [64 x float]* %[[VAL_5]] to float*
// CHECK: %[[VAL_161:.*]] = getelementptr inbounds float, float* %[[VAL_160]], i32 %[[VAL_159]]
// CHECK: %[[VAL_162:.*]] = load float, float* %[[VAL_161]], align 4, !invariant.load !4
// CHECK: %[[VAL_163:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_23]] to half*
// CHECK: %[[VAL_164:.*]] = getelementptr inbounds half, half* %[[VAL_163]], i32 %[[VAL_43]]
// CHECK: %[[VAL_163:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_20]] to half*
// CHECK: %[[VAL_164:.*]] = getelementptr inbounds half, half* %[[VAL_163]], i32 %[[VAL_45]]
// CHECK: %[[VAL_165:.*]] = load half, half* %[[VAL_164]], align 2, !invariant.load !4
// CHECK: %[[VAL_166:.*]] = fpext half %[[VAL_165]] to float
// CHECK: %[[VAL_167:.*]] = urem i32 %[[VAL_43]], 64
// CHECK: %[[VAL_168:.*]] = bitcast [64 x float]* %[[VAL_20]] to float*
// CHECK: %[[VAL_167:.*]] = urem i32 %[[VAL_45]], 64
// CHECK: %[[VAL_168:.*]] = bitcast [64 x float]* %[[VAL_17]] to float*
// CHECK: %[[VAL_169:.*]] = getelementptr inbounds float, float* %[[VAL_168]], i32 %[[VAL_167]]
// CHECK: %[[VAL_170:.*]] = load float, float* %[[VAL_169]], align 4, !invariant.load !4
// CHECK: %[[VAL_171:.*]] = load float, float* bitcast ([4 x i8]* @7 to float*), align 4
// CHECK: %[[VAL_172:.*]] = fmul float %[[VAL_170]], %[[VAL_171]]
// CHECK: %[[VAL_173:.*]] = fsub float %[[VAL_166]], %[[VAL_172]]
// CHECK: %[[VAL_174:.*]] = fmul float %[[VAL_162]], %[[VAL_173]]
// CHECK: %[[VAL_175:.*]] = urem i32 %[[VAL_43]], 64
// CHECK: %[[VAL_176:.*]] = bitcast [64 x float]* %[[VAL_5]] to float*
// CHECK: %[[VAL_175:.*]] = urem i32 %[[VAL_45]], 64
// CHECK: %[[VAL_176:.*]] = bitcast [64 x float]* %[[VAL_2]] to float*
// CHECK: %[[VAL_177:.*]] = getelementptr inbounds float, float* %[[VAL_176]], i32 %[[VAL_175]]
// CHECK: %[[VAL_178:.*]] = load float, float* %[[VAL_177]], align 4, !invariant.load !4
// CHECK: %[[VAL_179:.*]] = fdiv float %[[VAL_174]], %[[VAL_178]]
// CHECK: %[[VAL_180:.*]] = fsub float %[[VAL_158]], %[[VAL_179]]
// CHECK: %[[VAL_181:.*]] = fmul float %[[VAL_139]], %[[VAL_180]]
// CHECK: %[[VAL_182:.*]] = fptrunc float %[[VAL_181]] to half
// CHECK: %[[VAL_183:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_2]] to half*
// CHECK: %[[VAL_184:.*]] = getelementptr inbounds half, half* %[[VAL_183]], i32 %[[VAL_43]]
// CHECK: %[[VAL_183:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_29]] to half*
// CHECK: %[[VAL_184:.*]] = getelementptr inbounds half, half* %[[VAL_183]], i32 %[[VAL_45]]
// CHECK: store half %[[VAL_182]], half* %[[VAL_184]], align 2
// CHECK: %[[VAL_185:.*]] = urem i32 %[[VAL_51]], 64
// CHECK: %[[VAL_186:.*]] = bitcast [64 x float]* %[[VAL_17]] to float*
// CHECK: %[[VAL_185:.*]] = urem i32 %[[VAL_53]], 64
// CHECK: %[[VAL_186:.*]] = bitcast [64 x float]* %[[VAL_14]] to float*
// CHECK: %[[VAL_187:.*]] = getelementptr inbounds float, float* %[[VAL_186]], i32 %[[VAL_185]]
// CHECK: %[[VAL_188:.*]] = load float, float* %[[VAL_187]], align 4, !invariant.load !4
// CHECK: %[[VAL_189:.*]] = urem i32 %[[VAL_51]], 64
// CHECK: %[[VAL_190:.*]] = bitcast [64 x float]* %[[VAL_14]] to float*
// CHECK: %[[VAL_189:.*]] = urem i32 %[[VAL_53]], 64
// CHECK: %[[VAL_190:.*]] = bitcast [64 x float]* %[[VAL_11]] to float*
// CHECK: %[[VAL_191:.*]] = getelementptr inbounds float, float* %[[VAL_190]], i32 %[[VAL_189]]
// CHECK: %[[VAL_192:.*]] = load float, float* %[[VAL_191]], align 4, !invariant.load !4
// CHECK: %[[VAL_193:.*]] = fmul float %[[VAL_188]], %[[VAL_192]]
// CHECK: %[[VAL_194:.*]] = load float, float* bitcast ([4 x i8]* @8 to float*), align 4
// CHECK: %[[VAL_195:.*]] = fmul float %[[VAL_193]], %[[VAL_194]]
// CHECK: %[[VAL_196:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_29]] to half*
// CHECK: %[[VAL_197:.*]] = getelementptr inbounds half, half* %[[VAL_196]], i32 %[[VAL_51]]
// CHECK: %[[VAL_196:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_26]] to half*
// CHECK: %[[VAL_197:.*]] = getelementptr inbounds half, half* %[[VAL_196]], i32 %[[VAL_53]]
// CHECK: %[[VAL_198:.*]] = load half, half* %[[VAL_197]], align 2, !invariant.load !4
// CHECK: %[[VAL_199:.*]] = load half, half* bitcast ([2 x i8]* @9 to half*), align 2
// CHECK: %[[VAL_200:.*]] = fcmp ogt half %[[VAL_198]], %[[VAL_199]]
// CHECK: %[[VAL_201:.*]] = zext i1 %[[VAL_200]] to i8
// CHECK: %[[VAL_202:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_26]] to half*
// CHECK: %[[VAL_203:.*]] = getelementptr inbounds half, half* %[[VAL_202]], i32 %[[VAL_51]]
// CHECK: %[[VAL_202:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_23]] to half*
// CHECK: %[[VAL_203:.*]] = getelementptr inbounds half, half* %[[VAL_202]], i32 %[[VAL_53]]
// CHECK: %[[VAL_204:.*]] = load half, half* %[[VAL_203]], align 2, !invariant.load !4
// CHECK: %[[VAL_205:.*]] = trunc i8 %[[VAL_201]] to i1
// CHECK: %[[VAL_206:.*]] = select i1 %[[VAL_205]], half %[[VAL_204]], half %[[VAL_199]]
// CHECK: %[[VAL_207:.*]] = fpext half %[[VAL_206]] to float
// CHECK: %[[VAL_208:.*]] = load float, float* bitcast ([4 x i8]* @10 to float*), align 4
// CHECK: %[[VAL_209:.*]] = fmul float %[[VAL_207]], %[[VAL_208]]
// CHECK: %[[VAL_210:.*]] = urem i32 %[[VAL_51]], 64
// CHECK: %[[VAL_211:.*]] = bitcast [64 x float]* %[[VAL_11]] to float*
// CHECK: %[[VAL_210:.*]] = urem i32 %[[VAL_53]], 64
// CHECK: %[[VAL_211:.*]] = bitcast [64 x float]* %[[VAL_8]] to float*
// CHECK: %[[VAL_212:.*]] = getelementptr inbounds float, float* %[[VAL_211]], i32 %[[VAL_210]]
// CHECK: %[[VAL_213:.*]] = load float, float* %[[VAL_212]], align 4, !invariant.load !4
// CHECK: %[[VAL_214:.*]] = fsub float %[[VAL_209]], %[[VAL_213]]
// CHECK: %[[VAL_215:.*]] = urem i32 %[[VAL_51]], 64
// CHECK: %[[VAL_216:.*]] = bitcast [64 x float]* %[[VAL_8]] to float*
// CHECK: %[[VAL_215:.*]] = urem i32 %[[VAL_53]], 64
// CHECK: %[[VAL_216:.*]] = bitcast [64 x float]* %[[VAL_5]] to float*
// CHECK: %[[VAL_217:.*]] = getelementptr inbounds float, float* %[[VAL_216]], i32 %[[VAL_215]]
// CHECK: %[[VAL_218:.*]] = load float, float* %[[VAL_217]], align 4, !invariant.load !4
// CHECK: %[[VAL_219:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_23]] to half*
// CHECK: %[[VAL_220:.*]] = getelementptr inbounds half, half* %[[VAL_219]], i32 %[[VAL_51]]
// CHECK: %[[VAL_219:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_20]] to half*
// CHECK: %[[VAL_220:.*]] = getelementptr inbounds half, half* %[[VAL_219]], i32 %[[VAL_53]]
// CHECK: %[[VAL_221:.*]] = load half, half* %[[VAL_220]], align 2, !invariant.load !4
// CHECK: %[[VAL_222:.*]] = fpext half %[[VAL_221]] to float
// CHECK: %[[VAL_223:.*]] = urem i32 %[[VAL_51]], 64
// CHECK: %[[VAL_224:.*]] = bitcast [64 x float]* %[[VAL_20]] to float*
// CHECK: %[[VAL_223:.*]] = urem i32 %[[VAL_53]], 64
// CHECK: %[[VAL_224:.*]] = bitcast [64 x float]* %[[VAL_17]] to float*
// CHECK: %[[VAL_225:.*]] = getelementptr inbounds float, float* %[[VAL_224]], i32 %[[VAL_223]]
// CHECK: %[[VAL_226:.*]] = load float, float* %[[VAL_225]], align 4, !invariant.load !4
// CHECK: %[[VAL_227:.*]] = load float, float* bitcast ([4 x i8]* @11 to float*), align 4
// CHECK: %[[VAL_228:.*]] = fmul float %[[VAL_226]], %[[VAL_227]]
// CHECK: %[[VAL_229:.*]] = fsub float %[[VAL_222]], %[[VAL_228]]
// CHECK: %[[VAL_230:.*]] = fmul float %[[VAL_218]], %[[VAL_229]]
// CHECK: %[[VAL_231:.*]] = urem i32 %[[VAL_51]], 64
// CHECK: %[[VAL_232:.*]] = bitcast [64 x float]* %[[VAL_5]] to float*
// CHECK: %[[VAL_231:.*]] = urem i32 %[[VAL_53]], 64
// CHECK: %[[VAL_232:.*]] = bitcast [64 x float]* %[[VAL_2]] to float*
// CHECK: %[[VAL_233:.*]] = getelementptr inbounds float, float* %[[VAL_232]], i32 %[[VAL_231]]
// CHECK: %[[VAL_234:.*]] = load float, float* %[[VAL_233]], align 4, !invariant.load !4
// CHECK: %[[VAL_235:.*]] = fdiv float %[[VAL_230]], %[[VAL_234]]
// CHECK: %[[VAL_236:.*]] = fsub float %[[VAL_214]], %[[VAL_235]]
// CHECK: %[[VAL_237:.*]] = fmul float %[[VAL_195]], %[[VAL_236]]
// CHECK: %[[VAL_238:.*]] = fptrunc float %[[VAL_237]] to half
// CHECK: %[[VAL_239:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_2]] to half*
// CHECK: %[[VAL_240:.*]] = getelementptr inbounds half, half* %[[VAL_239]], i32 %[[VAL_51]]
// CHECK: %[[VAL_239:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_29]] to half*
// CHECK: %[[VAL_240:.*]] = getelementptr inbounds half, half* %[[VAL_239]], i32 %[[VAL_53]]
// CHECK: store half %[[VAL_238]], half* %[[VAL_240]], align 2
// CHECK: %[[VAL_241:.*]] = urem i32 %[[VAL_59]], 64
// CHECK: %[[VAL_242:.*]] = bitcast [64 x float]* %[[VAL_17]] to float*
// CHECK: %[[VAL_241:.*]] = urem i32 %[[VAL_61]], 64
// CHECK: %[[VAL_242:.*]] = bitcast [64 x float]* %[[VAL_14]] to float*
// CHECK: %[[VAL_243:.*]] = getelementptr inbounds float, float* %[[VAL_242]], i32 %[[VAL_241]]
// CHECK: %[[VAL_244:.*]] = load float, float* %[[VAL_243]], align 4, !invariant.load !4
// CHECK: %[[VAL_245:.*]] = urem i32 %[[VAL_59]], 64
// CHECK: %[[VAL_246:.*]] = bitcast [64 x float]* %[[VAL_14]] to float*
// CHECK: %[[VAL_245:.*]] = urem i32 %[[VAL_61]], 64
// CHECK: %[[VAL_246:.*]] = bitcast [64 x float]* %[[VAL_11]] to float*
// CHECK: %[[VAL_247:.*]] = getelementptr inbounds float, float* %[[VAL_246]], i32 %[[VAL_245]]
// CHECK: %[[VAL_248:.*]] = load float, float* %[[VAL_247]], align 4, !invariant.load !4
// CHECK: %[[VAL_249:.*]] = fmul float %[[VAL_244]], %[[VAL_248]]
// CHECK: %[[VAL_250:.*]] = load float, float* bitcast ([4 x i8]* @12 to float*), align 4
// CHECK: %[[VAL_251:.*]] = fmul float %[[VAL_249]], %[[VAL_250]]
// CHECK: %[[VAL_252:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_29]] to half*
// CHECK: %[[VAL_253:.*]] = getelementptr inbounds half, half* %[[VAL_252]], i32 %[[VAL_59]]
// CHECK: %[[VAL_252:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_26]] to half*
// CHECK: %[[VAL_253:.*]] = getelementptr inbounds half, half* %[[VAL_252]], i32 %[[VAL_61]]
// CHECK: %[[VAL_254:.*]] = load half, half* %[[VAL_253]], align 2, !invariant.load !4
// CHECK: %[[VAL_255:.*]] = load half, half* bitcast ([2 x i8]* @13 to half*), align 2
// CHECK: %[[VAL_256:.*]] = fcmp ogt half %[[VAL_254]], %[[VAL_255]]
// CHECK: %[[VAL_257:.*]] = zext i1 %[[VAL_256]] to i8
// CHECK: %[[VAL_258:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_26]] to half*
// CHECK: %[[VAL_259:.*]] = getelementptr inbounds half, half* %[[VAL_258]], i32 %[[VAL_59]]
// CHECK: %[[VAL_258:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_23]] to half*
// CHECK: %[[VAL_259:.*]] = getelementptr inbounds half, half* %[[VAL_258]], i32 %[[VAL_61]]
// CHECK: %[[VAL_260:.*]] = load half, half* %[[VAL_259]], align 2, !invariant.load !4
// CHECK: %[[VAL_261:.*]] = trunc i8 %[[VAL_257]] to i1
// CHECK: %[[VAL_262:.*]] = select i1 %[[VAL_261]], half %[[VAL_260]], half %[[VAL_255]]
// CHECK: %[[VAL_263:.*]] = fpext half %[[VAL_262]] to float
// CHECK: %[[VAL_264:.*]] = load float, float* bitcast ([4 x i8]* @14 to float*), align 4
// CHECK: %[[VAL_265:.*]] = fmul float %[[VAL_263]], %[[VAL_264]]
// CHECK: %[[VAL_266:.*]] = urem i32 %[[VAL_59]], 64
// CHECK: %[[VAL_267:.*]] = bitcast [64 x float]* %[[VAL_11]] to float*
// CHECK: %[[VAL_266:.*]] = urem i32 %[[VAL_61]], 64
// CHECK: %[[VAL_267:.*]] = bitcast [64 x float]* %[[VAL_8]] to float*
// CHECK: %[[VAL_268:.*]] = getelementptr inbounds float, float* %[[VAL_267]], i32 %[[VAL_266]]
// CHECK: %[[VAL_269:.*]] = load float, float* %[[VAL_268]], align 4, !invariant.load !4
// CHECK: %[[VAL_270:.*]] = fsub float %[[VAL_265]], %[[VAL_269]]
// CHECK: %[[VAL_271:.*]] = urem i32 %[[VAL_59]], 64
// CHECK: %[[VAL_272:.*]] = bitcast [64 x float]* %[[VAL_8]] to float*
// CHECK: %[[VAL_271:.*]] = urem i32 %[[VAL_61]], 64
// CHECK: %[[VAL_272:.*]] = bitcast [64 x float]* %[[VAL_5]] to float*
// CHECK: %[[VAL_273:.*]] = getelementptr inbounds float, float* %[[VAL_272]], i32 %[[VAL_271]]
// CHECK: %[[VAL_274:.*]] = load float, float* %[[VAL_273]], align 4, !invariant.load !4
// CHECK: %[[VAL_275:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_23]] to half*
// CHECK: %[[VAL_276:.*]] = getelementptr inbounds half, half* %[[VAL_275]], i32 %[[VAL_59]]
// CHECK: %[[VAL_275:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_20]] to half*
// CHECK: %[[VAL_276:.*]] = getelementptr inbounds half, half* %[[VAL_275]], i32 %[[VAL_61]]
// CHECK: %[[VAL_277:.*]] = load half, half* %[[VAL_276]], align 2, !invariant.load !4
// CHECK: %[[VAL_278:.*]] = fpext half %[[VAL_277]] to float
// CHECK: %[[VAL_279:.*]] = urem i32 %[[VAL_59]], 64
// CHECK: %[[VAL_280:.*]] = bitcast [64 x float]* %[[VAL_20]] to float*
// CHECK: %[[VAL_279:.*]] = urem i32 %[[VAL_61]], 64
// CHECK: %[[VAL_280:.*]] = bitcast [64 x float]* %[[VAL_17]] to float*
// CHECK: %[[VAL_281:.*]] = getelementptr inbounds float, float* %[[VAL_280]], i32 %[[VAL_279]]
// CHECK: %[[VAL_282:.*]] = load float, float* %[[VAL_281]], align 4, !invariant.load !4
// CHECK: %[[VAL_283:.*]] = load float, float* bitcast ([4 x i8]* @15 to float*), align 4
// CHECK: %[[VAL_284:.*]] = fmul float %[[VAL_282]], %[[VAL_283]]
// CHECK: %[[VAL_285:.*]] = fsub float %[[VAL_278]], %[[VAL_284]]
// CHECK: %[[VAL_286:.*]] = fmul float %[[VAL_274]], %[[VAL_285]]
// CHECK: %[[VAL_287:.*]] = urem i32 %[[VAL_59]], 64
// CHECK: %[[VAL_288:.*]] = bitcast [64 x float]* %[[VAL_5]] to float*
// CHECK: %[[VAL_287:.*]] = urem i32 %[[VAL_61]], 64
// CHECK: %[[VAL_288:.*]] = bitcast [64 x float]* %[[VAL_2]] to float*
// CHECK: %[[VAL_289:.*]] = getelementptr inbounds float, float* %[[VAL_288]], i32 %[[VAL_287]]
// CHECK: %[[VAL_290:.*]] = load float, float* %[[VAL_289]], align 4, !invariant.load !4
// CHECK: %[[VAL_291:.*]] = fdiv float %[[VAL_286]], %[[VAL_290]]
// CHECK: %[[VAL_292:.*]] = fsub float %[[VAL_270]], %[[VAL_291]]
// CHECK: %[[VAL_293:.*]] = fmul float %[[VAL_251]], %[[VAL_292]]
// CHECK: %[[VAL_294:.*]] = fptrunc float %[[VAL_293]] to half
// CHECK: %[[VAL_295:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_2]] to half*
// CHECK: %[[VAL_296:.*]] = getelementptr inbounds half, half* %[[VAL_295]], i32 %[[VAL_59]]
// CHECK: %[[VAL_295:.*]] = bitcast [128 x [112 x [112 x [64 x half]]]]* %[[VAL_29]] to half*
// CHECK: %[[VAL_296:.*]] = getelementptr inbounds half, half* %[[VAL_295]], i32 %[[VAL_61]]
// CHECK: store half %[[VAL_294]], half* %[[VAL_296]], align 2
// CHECK: br label %[[VAL_71]]