--- build.a/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/shell.s 2024-04-01 12:41:02.662358011 +0000 +++ build.b/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/shell.s 2024-04-01 12:41:14.770020356 +0000 @@ -5860,27 +5860,25 @@ call sqlite3_free j .LBB14_10 .LBB14_20: # %vector.ph - vsetvli a0, zero, e32, m1, ta, ma - vmv.v.i v12, 0 - vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v12, a1 - vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v10, 0 neg a0, a2 + vsetvli a3, zero, e32, m1, ta, ma + vmv.v.i v10, 0 + vsetvli zero, zero, e32, m1, tu, ma + vmv.s.x v10, a1 + vsetvli a1, zero, e32, m2, ta, ma vmv.v.i v8, 0 and a0, s4, a0 - vmv1r.v v8, v12 + vmv1r.v v8, v10 li a1, 34 mv a3, a0 mv a4, s1 .LBB14_21: # %vector.body # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a4) vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vx v0, v12, a1 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vle8.v v10, (a4) + vmseq.vx v0, v10, a1 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a3, a3, a2 add a4, a4, a2 bnez a3, .LBB14_21 @@ -6013,27 +6011,25 @@ li a1, 0 j .LBB14_44 .LBB14_41: # %vector.ph452 - vsetvli a1, zero, e32, m1, ta, ma - vmv.v.i v12, 0 + neg a1, a2 + vsetvli a3, zero, e32, m1, ta, ma + vmv.v.i v10, 0 vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v12, a0 + vmv.s.x v10, a0 vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v10, 0 - neg a0, a2 vmv.v.i v8, 0 - and a1, s5, a0 - vmv1r.v v8, v12 + and a1, s5, a1 + vmv1r.v v8, v10 li a0, 34 mv a3, a1 mv a4, s1 .LBB14_42: # %vector.body457 # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a4) vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vx v0, v12, a0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vle8.v v10, (a4) + vmseq.vx v0, v10, a0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a3, a3, a2 add a4, a4, a2 bnez a3, .LBB14_42 @@ -6115,27 +6111,25 @@ li a0, 0 j .LBB14_63 .LBB14_60: # %vector.ph466 - vsetvli a0, zero, e32, m1, ta, ma - vmv.v.i v12, 0 - vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v12, a1 - vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v10, 0 neg a0, a2 + vsetvli a3, zero, e32, m1, ta, ma + vmv.v.i v10, 0 + vsetvli zero, zero, e32, m1, tu, ma + vmv.s.x v10, a1 + vsetvli a1, zero, e32, m2, ta, ma vmv.v.i v8, 0 and a0, s5, a0 - vmv1r.v v8, v12 + vmv1r.v v8, v10 li a1, 39 mv a3, a0 mv a4, s3 .LBB14_61: # %vector.body471 # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a4) vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vx v0, v12, a1 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vle8.v v10, (a4) + vmseq.vx v0, v10, a1 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a3, a3, a2 add a4, a4, a2 bnez a3, .LBB14_61 @@ -6232,13 +6226,14 @@ srli s9, s9, 1 vsetvli a0, zero, e32, m1, ta, ma vmv.v.i v8, 0 - addi a0, sp, 48 - vs1r.v v8, (a0) # Unknown-size Folded Spill - vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v8, 0 csrr a0, vlenb + slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 48 + vs1r.v v8, (a0) # Unknown-size Folded Spill + vsetvli a0, zero, e32, m2, ta, ma + vmv.v.i v8, 0 + addi a0, sp, 48 vs2r.v v8, (a0) # Unknown-size Folded Spill li s10, 34 li s7, 113 @@ -6304,10 +6299,6 @@ add a1, s5, s4 addiw a1, a1, 3 and s5, s5, s6 - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 48 - vl2r.v v12, (a0) # Unknown-size Folded Reload blez s11, .LBB14_94 # %bb.87: # %for.body.preheader.i261 # in Loop: Header=BB14_80 Depth=1 @@ -6320,10 +6311,14 @@ neg a0, s9 and a0, s5, a0 vsetvli a2, zero, e32, m1, tu, ma - addi a2, sp, 48 + csrr a2, vlenb + slli a2, a2, 1 + add a2, sp, a2 + addi a2, a2, 48 vl1r.v v10, (a2) # Unknown-size Folded Reload vmv.s.x v10, a1 - vmv2r.v v8, v12 + addi a1, sp, 48 + vl2r.v v8, (a1) # Unknown-size Folded Reload vmv1r.v v8, v10 mv a1, a0 mv a2, s2 @@ -6333,9 +6328,8 @@ vsetvli a3, zero, e8, mf2, ta, ma vle8.v v10, (a2) vmseq.vx v0, v10, s10 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v10, v12, 1, v0 - vadd.vv v8, v8, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a1, a1, s9 add a2, a2, s9 bnez a1, .LBB14_90 @@ -6549,27 +6543,25 @@ li a0, 0 j .LBB14_131 .LBB14_128: # %vector.ph494 - vsetvli a0, zero, e32, m1, ta, ma - vmv.v.i v12, 0 - vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v12, a1 - vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v10, 0 neg a0, a2 + vsetvli a3, zero, e32, m1, ta, ma + vmv.v.i v10, 0 + vsetvli zero, zero, e32, m1, tu, ma + vmv.s.x v10, a1 + vsetvli a1, zero, e32, m2, ta, ma vmv.v.i v8, 0 and a0, s4, a0 - vmv1r.v v8, v12 + vmv1r.v v8, v10 li a1, 34 mv a3, a0 mv a4, s1 .LBB14_129: # %vector.body499 # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a4) vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vx v0, v12, a1 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vle8.v v10, (a4) + vmseq.vx v0, v10, a1 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a3, a3, a2 add a4, a4, a2 bnez a3, .LBB14_129 @@ -6725,25 +6717,23 @@ j .LBB15_20 .LBB15_8: # %vector.ph neg a0, a2 + and a0, s4, a0 vsetvli a3, zero, e32, m1, ta, ma - vmv.v.i v12, 0 + vmv.v.i v10, 0 vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v12, a1 + vmv.s.x v10, a1 vsetvli a1, zero, e32, m2, ta, ma - vmv.v.i v10, 0 - and a0, s4, a0 vmv.v.i v8, 0 - vmv1r.v v8, v12 + vmv1r.v v8, v10 mv a1, a0 mv a3, s0 .LBB15_9: # %vector.body # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a3) vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vx v0, v12, s1 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vle8.v v10, (a3) + vmseq.vx v0, v10, s1 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a1, a1, a2 add a3, a3, a2 bnez a1, .LBB15_9 --- build.a/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s 2024-04-01 12:41:02.846352880 +0000 +++ build.b/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/pairlocalalign.s 2024-04-01 12:41:14.954015225 +0000 @@ -1026,21 +1026,19 @@ vmv.v.i v8, 0 li a6, 45 mv a7, a4 - vmv.v.i v10, 0 .LBB1_5: # %vector.body # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a0) vsetvli zero, zero, e8, mf2, ta, ma - vmsne.vx v0, v12, a6 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vle8.v v10, (a0) + vmsne.vx v0, v10, a6 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a7, a7, a5 add a0, a0, a5 bnez a7, .LBB1_5 # %bb.6: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a0, v8 beq a4, a3, .LBB1_9 .LBB1_7: # %while.body.preheader8 --- build.a/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/sim4b1.s 2024-04-01 12:41:02.626359015 +0000 +++ build.b/MultiSource/Applications/SIBsim4/CMakeFiles/SIBsim4.dir/sim4b1.s 2024-04-01 12:41:14.730021472 +0000 @@ -63,7 +63,7 @@ mv s11, a0 lw a0, 16(a0) bnez a0, .LBB0_1 - j .LBB0_679 + j .LBB0_678 .LBB0_1: # %lor.lhs.false mv s10, a1 lui a0, 1 @@ -71,37 +71,37 @@ add a1, a1, a0 lw a0, 0(a1) bnez a0, .LBB0_2 - j .LBB0_679 + j .LBB0_678 .LBB0_2: # %if.end - mv s7, a2 + mv s0, a2 li a0, 5 slli a0, a0, 32 sd a0, 544(sp) li a0, 40 - mv s0, a1 + mv s1, a1 call xmalloc .Lpcrel_hi0: auipc a1, %got_pcrel_hi(options) ld s9, %pcrel_lo(.Lpcrel_hi0)(a1) ld a1, 16(s10) - sd s0, 160(sp) # 8-byte Folded Spill - lw a2, 0(s0) + sd s1, 160(sp) # 8-byte Folded Spill + lw a2, 0(s1) lw a5, 40(s9) sd a0, 536(sp) li a3, 1 li a4, 1 addi a6, sp, 536 mv a0, s11 - mv a7, s7 + mv a7, s0 li t2, 0 call exon_cores vsetivli zero, 2, e64, m1, ta, ma - lw a0, 8(s7) + lw a0, 8(s0) vmv.v.i v8, 0 addi a1, sp, 512 vse64.v v8, (a1) bnez a0, .LBB0_3 - j .LBB0_678 + j .LBB0_677 .LBB0_3: # %for.body.lr.ph li a1, 0 lui a0, 4096 @@ -109,7 +109,7 @@ sd a0, 128(sp) # 8-byte Folded Spill lui a0, 1024 addi a0, a0, -1 - sd a0, 72(sp) # 8-byte Folded Spill + sd a0, 64(sp) # 8-byte Folded Spill csrr a0, vlenb slli a2, a0, 1 sd a2, 56(sp) # 8-byte Folded Spill @@ -119,8 +119,7 @@ li s6, 2 .Lpcrel_hi1: auipc a0, %got_pcrel_hi(free) - ld a0, %pcrel_lo(.Lpcrel_hi1)(a0) - sd a0, 216(sp) # 8-byte Folded Spill + ld s7, %pcrel_lo(.Lpcrel_hi1)(a0) lui a0, 244 addiw a0, a0, 575 sd a0, 144(sp) # 8-byte Folded Spill @@ -147,99 +146,101 @@ sd a0, 136(sp) # 8-byte Folded Spill sd s11, 248(sp) # 8-byte Folded Spill sd s10, 264(sp) # 8-byte Folded Spill - sd s9, 168(sp) # 8-byte Folded Spill - sd s7, 64(sp) # 8-byte Folded Spill + sd s9, 200(sp) # 8-byte Folded Spill + sd s0, 88(sp) # 8-byte Folded Spill + sd s7, 216(sp) # 8-byte Folded Spill j .LBB0_6 .LBB0_4: # %free_align.exit # in Loop: Header=BB0_6 Depth=1 sd zero, 0(s8) .LBB0_5: # %cleanup599 # in Loop: Header=BB0_6 Depth=1 - lwu a0, 8(s7) + ld s0, 88(sp) # 8-byte Folded Reload + lwu a0, 8(s0) ld a1, 152(sp) # 8-byte Folded Reload addi a1, a1, 1 bltu a1, a0, .LBB0_6 - j .LBB0_678 + j .LBB0_677 .LBB0_6: # %for.body # =>This Loop Header: Depth=1 # Child Loop BB0_14 Depth 2 # Child Loop BB0_17 Depth 2 # Child Loop BB0_22 Depth 2 - # Child Loop BB0_29 Depth 2 - # Child Loop BB0_34 Depth 2 - # Child Loop BB0_44 Depth 2 - # Child Loop BB0_55 Depth 2 - # Child Loop BB0_58 Depth 2 - # Child Loop BB0_61 Depth 2 - # Child Loop BB0_64 Depth 2 - # Child Loop BB0_66 Depth 2 - # Child Loop BB0_82 Depth 3 - # Child Loop BB0_95 Depth 4 - # Child Loop BB0_104 Depth 3 - # Child Loop BB0_112 Depth 3 - # Child Loop BB0_114 Depth 3 - # Child Loop BB0_121 Depth 2 - # Child Loop BB0_134 Depth 2 - # Child Loop BB0_137 Depth 2 - # Child Loop BB0_141 Depth 2 - # Child Loop BB0_149 Depth 2 - # Child Loop BB0_155 Depth 2 - # Child Loop BB0_168 Depth 2 - # Child Loop BB0_179 Depth 2 - # Child Loop BB0_194 Depth 3 - # Child Loop BB0_210 Depth 4 - # Child Loop BB0_221 Depth 3 - # Child Loop BB0_228 Depth 3 - # Child Loop BB0_230 Depth 3 - # Child Loop BB0_674 Depth 2 - # Child Loop BB0_242 Depth 2 - # Child Loop BB0_265 Depth 3 - # Child Loop BB0_299 Depth 3 - # Child Loop BB0_302 Depth 3 - # Child Loop BB0_307 Depth 3 - # Child Loop BB0_437 Depth 3 - # Child Loop BB0_320 Depth 3 - # Child Loop BB0_329 Depth 3 - # Child Loop BB0_332 Depth 3 - # Child Loop BB0_336 Depth 3 - # Child Loop BB0_341 Depth 4 - # Child Loop BB0_353 Depth 5 - # Child Loop BB0_358 Depth 4 - # Child Loop BB0_363 Depth 4 - # Child Loop BB0_369 Depth 4 - # Child Loop BB0_385 Depth 5 - # Child Loop BB0_392 Depth 4 - # Child Loop BB0_397 Depth 4 - # Child Loop BB0_425 Depth 3 - # Child Loop BB0_433 Depth 3 - # Child Loop BB0_250 Depth 3 - # Child Loop BB0_255 Depth 3 - # Child Loop BB0_278 Depth 3 - # Child Loop BB0_443 Depth 2 - # Child Loop BB0_455 Depth 2 - # Child Loop BB0_468 Depth 2 - # Child Loop BB0_475 Depth 2 - # Child Loop BB0_481 Depth 2 - # Child Loop BB0_485 Depth 3 - # Child Loop BB0_504 Depth 3 - # Child Loop BB0_498 Depth 3 - # Child Loop BB0_521 Depth 2 - # Child Loop BB0_526 Depth 3 - # Child Loop BB0_529 Depth 4 - # Child Loop BB0_542 Depth 4 - # Child Loop BB0_554 Depth 4 - # Child Loop BB0_563 Depth 3 - # Child Loop BB0_571 Depth 2 - # Child Loop BB0_578 Depth 3 - # Child Loop BB0_580 Depth 4 - # Child Loop BB0_601 Depth 2 - # Child Loop BB0_609 Depth 3 - # Child Loop BB0_618 Depth 2 - # Child Loop BB0_647 Depth 3 - # Child Loop BB0_655 Depth 4 - # Child Loop BB0_658 Depth 4 - # Child Loop BB0_671 Depth 2 - ld a0, 0(s7) + # Child Loop BB0_28 Depth 2 + # Child Loop BB0_33 Depth 2 + # Child Loop BB0_43 Depth 2 + # Child Loop BB0_54 Depth 2 + # Child Loop BB0_57 Depth 2 + # Child Loop BB0_60 Depth 2 + # Child Loop BB0_63 Depth 2 + # Child Loop BB0_65 Depth 2 + # Child Loop BB0_81 Depth 3 + # Child Loop BB0_94 Depth 4 + # Child Loop BB0_103 Depth 3 + # Child Loop BB0_111 Depth 3 + # Child Loop BB0_113 Depth 3 + # Child Loop BB0_120 Depth 2 + # Child Loop BB0_133 Depth 2 + # Child Loop BB0_136 Depth 2 + # Child Loop BB0_140 Depth 2 + # Child Loop BB0_148 Depth 2 + # Child Loop BB0_154 Depth 2 + # Child Loop BB0_167 Depth 2 + # Child Loop BB0_178 Depth 2 + # Child Loop BB0_193 Depth 3 + # Child Loop BB0_209 Depth 4 + # Child Loop BB0_220 Depth 3 + # Child Loop BB0_227 Depth 3 + # Child Loop BB0_229 Depth 3 + # Child Loop BB0_673 Depth 2 + # Child Loop BB0_241 Depth 2 + # Child Loop BB0_264 Depth 3 + # Child Loop BB0_298 Depth 3 + # Child Loop BB0_301 Depth 3 + # Child Loop BB0_306 Depth 3 + # Child Loop BB0_436 Depth 3 + # Child Loop BB0_319 Depth 3 + # Child Loop BB0_328 Depth 3 + # Child Loop BB0_331 Depth 3 + # Child Loop BB0_335 Depth 3 + # Child Loop BB0_340 Depth 4 + # Child Loop BB0_352 Depth 5 + # Child Loop BB0_357 Depth 4 + # Child Loop BB0_362 Depth 4 + # Child Loop BB0_368 Depth 4 + # Child Loop BB0_384 Depth 5 + # Child Loop BB0_391 Depth 4 + # Child Loop BB0_396 Depth 4 + # Child Loop BB0_424 Depth 3 + # Child Loop BB0_432 Depth 3 + # Child Loop BB0_249 Depth 3 + # Child Loop BB0_254 Depth 3 + # Child Loop BB0_277 Depth 3 + # Child Loop BB0_442 Depth 2 + # Child Loop BB0_454 Depth 2 + # Child Loop BB0_467 Depth 2 + # Child Loop BB0_474 Depth 2 + # Child Loop BB0_480 Depth 2 + # Child Loop BB0_484 Depth 3 + # Child Loop BB0_503 Depth 3 + # Child Loop BB0_497 Depth 3 + # Child Loop BB0_520 Depth 2 + # Child Loop BB0_525 Depth 3 + # Child Loop BB0_528 Depth 4 + # Child Loop BB0_541 Depth 4 + # Child Loop BB0_553 Depth 4 + # Child Loop BB0_562 Depth 3 + # Child Loop BB0_570 Depth 2 + # Child Loop BB0_577 Depth 3 + # Child Loop BB0_579 Depth 4 + # Child Loop BB0_600 Depth 2 + # Child Loop BB0_608 Depth 3 + # Child Loop BB0_617 Depth 2 + # Child Loop BB0_646 Depth 3 + # Child Loop BB0_654 Depth 4 + # Child Loop BB0_657 Depth 4 + # Child Loop BB0_670 Depth 2 + ld a0, 0(s0) sd a1, 152(sp) # 8-byte Folded Spill slli a1, a1, 3 add a0, a0, a1 @@ -259,13 +260,13 @@ li t2, -1 sd s8, 392(sp) # 8-byte Folded Spill sd s5, 440(sp) # 8-byte Folded Spill - bnez a0, .LBB0_127 + bnez a0, .LBB0_126 # %bb.8: # %land.lhs.true # in Loop: Header=BB0_6 Depth=1 ld a0, 0(s5) ld s3, 0(a0) lwu a0, 4(s3) - bltu a0, s6, .LBB0_127 + bltu a0, s6, .LBB0_126 # %bb.9: # %if.then17 # in Loop: Header=BB0_6 Depth=1 addiw a1, a0, -62 @@ -276,12 +277,12 @@ lw a1, 0(s3) lw a2, 24(s8) addiw a1, a1, -1 + ld s7, 216(sp) # 8-byte Folded Reload bgeu a2, a1, .LBB0_27 # %bb.11: # %if.then26 # in Loop: Header=BB0_6 Depth=1 lw a1, 36(s11) li a2, 10 - ld s7, 64(sp) # 8-byte Folded Reload bltu a1, a2, .LBB0_13 # %bb.12: # %if.then26 # in Loop: Header=BB0_6 Depth=1 @@ -328,12 +329,11 @@ lui s1, 1024 add s1, s0, s1 mv s2, s0 - ld s4, 216(sp) # 8-byte Folded Reload .LBB0_14: # %for.body.i # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld a0, 0(s2) - mv a1, s4 + mv a1, s7 call tdestroy addi s2, s2, 8 bne s2, s1, .LBB0_14 @@ -433,18 +433,15 @@ # in Loop: Header=BB0_6 Depth=1 lw s2, 16(s8) li t2, -1 - bnez s2, .LBB0_28 - j .LBB0_37 -.LBB0_27: # in Loop: Header=BB0_6 Depth=1 - ld s7, 64(sp) # 8-byte Folded Reload -.LBB0_28: # %land.rhs.lr.ph + beqz s2, .LBB0_36 +.LBB0_27: # %land.rhs.lr.ph # in Loop: Header=BB0_6 Depth=1 ld s4, 0(s5) ld s0, 16(s10) li s1, 0 slli a0, s2, 32 srli s5, a0, 32 -.LBB0_29: # %land.rhs +.LBB0_28: # %land.rhs # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld a0, 0(s4) @@ -452,27 +449,27 @@ lw a0, 4(a0) mv a2, s0 call is_polyAT_exon_p - beqz a0, .LBB0_32 -# %bb.30: # %while.body - # in Loop: Header=BB0_29 Depth=2 + beqz a0, .LBB0_31 +# %bb.29: # %while.body + # in Loop: Header=BB0_28 Depth=2 addiw s1, s1, 1 addi s5, s5, -1 addi s4, s4, 8 - bnez s5, .LBB0_29 -# %bb.31: # in Loop: Header=BB0_6 Depth=1 + bnez s5, .LBB0_28 +# %bb.30: # in Loop: Header=BB0_6 Depth=1 mv s1, s2 -.LBB0_32: # %while.end +.LBB0_31: # %while.end # in Loop: Header=BB0_6 Depth=1 li t2, -1 ld s5, 440(sp) # 8-byte Folded Reload - beqz s1, .LBB0_37 -# %bb.33: # %for.body69.preheader + beqz s1, .LBB0_36 +# %bb.32: # %for.body69.preheader # in Loop: Header=BB0_6 Depth=1 li s2, 0 slli a0, s1, 32 srli s0, a0, 32 srli s3, a0, 29 -.LBB0_34: # %for.body69 +.LBB0_33: # %for.body69 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld a0, 0(s5) @@ -480,8 +477,8 @@ ld a0, 0(a0) call free addi s2, s2, 8 - bne s3, s2, .LBB0_34 -# %bb.35: # %for.end + bne s3, s2, .LBB0_33 +# %bb.34: # %for.end # in Loop: Header=BB0_6 Depth=1 ld a0, 8(s8) lw a2, 16(s8) @@ -495,34 +492,34 @@ subw a1, a0, s1 sw a1, 16(s8) beq a0, s1, .LBB0_5 -# %bb.36: # %cleanup +# %bb.35: # %cleanup # in Loop: Header=BB0_6 Depth=1 ld a0, 0(s5) ld s3, 0(a0) li t2, -1 -.LBB0_37: # %if.end88 +.LBB0_36: # %if.end88 # in Loop: Header=BB0_6 Depth=1 lwu a0, 4(s3) addiw s7, a0, -1 - beqz s7, .LBB0_127 -# %bb.38: # %if.then93 + beqz s7, .LBB0_126 +# %bb.37: # %if.then93 # in Loop: Header=BB0_6 Depth=1 li a1, 250 li t0, 1 - bltu s7, a1, .LBB0_40 -# %bb.39: # %if.then93 + bltu s7, a1, .LBB0_39 +# %bb.38: # %if.then93 # in Loop: Header=BB0_6 Depth=1 li s7, 250 -.LBB0_40: # %if.then93 +.LBB0_39: # %if.then93 # in Loop: Header=BB0_6 Depth=1 li t1, 2 lw a1, 0(s3) slli a2, s7, 2 - blt a2, a1, .LBB0_42 -# %bb.41: # %if.then93 + blt a2, a1, .LBB0_41 +# %bb.40: # %if.then93 # in Loop: Header=BB0_6 Depth=1 addi a2, a1, -1 -.LBB0_42: # %if.then93 +.LBB0_41: # %if.then93 # in Loop: Header=BB0_6 Depth=1 ld a3, 16(s10) sext.w s10, a2 @@ -539,12 +536,12 @@ addi s8, a5, -1 mv s6, s7 mv a4, s10 - blez s10, .LBB0_48 -# %bb.43: # %land.rhs.i296.preheader + blez s10, .LBB0_47 +# %bb.42: # %land.rhs.i296.preheader # in Loop: Header=BB0_6 Depth=1 mv a5, s10 mv s6, s7 -.LBB0_44: # %land.rhs.i296 +.LBB0_43: # %land.rhs.i296 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 mv a6, s6 @@ -552,30 +549,30 @@ lbu a4, 0(a4) add a7, s8, a5 lbu a7, 0(a7) - bne a4, a7, .LBB0_47 -# %bb.45: # %for.inc.i - # in Loop: Header=BB0_44 Depth=2 + bne a4, a7, .LBB0_46 +# %bb.44: # %for.inc.i + # in Loop: Header=BB0_43 Depth=2 addiw s6, a6, -1 addiw a4, a5, -1 - blt a5, t1, .LBB0_48 -# %bb.46: # %for.inc.i - # in Loop: Header=BB0_44 Depth=2 + blt a5, t1, .LBB0_47 +# %bb.45: # %for.inc.i + # in Loop: Header=BB0_43 Depth=2 mv a5, a4 - blt t0, a6, .LBB0_44 - j .LBB0_48 -.LBB0_47: # in Loop: Header=BB0_6 Depth=1 + blt t0, a6, .LBB0_43 + j .LBB0_47 +.LBB0_46: # in Loop: Header=BB0_6 Depth=1 mv s6, a6 mv a4, a5 -.LBB0_48: # %for.end.i +.LBB0_47: # %for.end.i # in Loop: Header=BB0_6 Depth=1 add a5, a0, a3 not a0, a2 add a6, a1, a0 - beqz s6, .LBB0_53 -# %bb.49: # %for.end.i + beqz s6, .LBB0_52 +# %bb.48: # %for.end.i # in Loop: Header=BB0_6 Depth=1 - beqz a4, .LBB0_53 -# %bb.50: # %if.end.i + beqz a4, .LBB0_52 +# %bb.49: # %if.end.i # in Loop: Header=BB0_6 Depth=1 sd a6, 400(sp) # 8-byte Folded Spill sd a5, 408(sp) # 8-byte Folded Spill @@ -591,18 +588,18 @@ mv a0, s0 call xmalloc sd a0, 464(sp) # 8-byte Folded Spill - bltz s2, .LBB0_59 -# %bb.51: # %for.body28.preheader.i + bltz s2, .LBB0_58 +# %bb.50: # %for.body28.preheader.i # in Loop: Header=BB0_6 Depth=1 slli a0, s1, 32 srli a0, a0, 32 ld a2, 312(sp) # 8-byte Folded Reload srli a1, a2, 1 - bgeu a0, a1, .LBB0_54 -# %bb.52: # in Loop: Header=BB0_6 Depth=1 + bgeu a0, a1, .LBB0_53 +# %bb.51: # in Loop: Header=BB0_6 Depth=1 li a1, 0 - j .LBB0_57 -.LBB0_53: # %if.then.i + j .LBB0_56 +.LBB0_52: # %if.then.i # in Loop: Header=BB0_6 Depth=1 li s2, 0 add a5, s6, a5 @@ -611,8 +608,8 @@ ld s10, 264(sp) # 8-byte Folded Reload li s6, 2 ld s8, 392(sp) # 8-byte Folded Reload - j .LBB0_125 -.LBB0_54: # %vector.ph1124 + j .LBB0_124 +.LBB0_53: # %vector.ph1124 # in Loop: Header=BB0_6 Depth=1 srli a1, a2, 3 slli a1, a1, 2 @@ -624,30 +621,30 @@ ld a3, 456(sp) # 8-byte Folded Reload ld a4, 56(sp) # 8-byte Folded Reload ld a5, 224(sp) # 8-byte Folded Reload -.LBB0_55: # %vector.body1129 +.LBB0_54: # %vector.body1129 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 vs2r.v v8, (a3) sub a2, a2, a5 add a3, a3, a4 - bnez a2, .LBB0_55 -# %bb.56: # %middle.block1121 + bnez a2, .LBB0_54 +# %bb.55: # %middle.block1121 # in Loop: Header=BB0_6 Depth=1 - beq a1, a0, .LBB0_59 -.LBB0_57: # %for.body28.i.preheader + beq a1, a0, .LBB0_58 +.LBB0_56: # %for.body28.i.preheader # in Loop: Header=BB0_6 Depth=1 slli a1, a1, 2 ld a2, 456(sp) # 8-byte Folded Reload add a1, a2, a1 slli a0, a0, 2 add a0, a2, a0 -.LBB0_58: # %for.body28.i +.LBB0_57: # %for.body28.i # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 sw s9, 0(a1) addi a1, a1, 4 - bne a1, a0, .LBB0_58 -.LBB0_59: # %for.end33.i + bne a1, a0, .LBB0_57 +.LBB0_58: # %for.end33.i # in Loop: Header=BB0_6 Depth=1 slli a0, s10, 2 ld a1, 456(sp) # 8-byte Folded Reload @@ -667,8 +664,8 @@ ld a4, 56(sp) # 8-byte Folded Reload li s6, 2 sd a1, 336(sp) # 8-byte Folded Spill - bltu s7, a1, .LBB0_63 -# %bb.60: # %vector.ph1109 + bltu s7, a1, .LBB0_62 +# %bb.59: # %vector.ph1109 # in Loop: Header=BB0_6 Depth=1 ld a0, 312(sp) # 8-byte Folded Reload srli a0, a0, 3 @@ -681,30 +678,30 @@ addi a2, s0, 4 mv a3, a1 ld a6, 224(sp) # 8-byte Folded Reload -.LBB0_61: # %vector.body1115 +.LBB0_60: # %vector.body1115 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 vs2r.v v8, (a2) sub a3, a3, a6 add a2, a2, a4 - bnez a3, .LBB0_61 -# %bb.62: # %middle.block1106 + bnez a3, .LBB0_60 +# %bb.61: # %middle.block1106 # in Loop: Header=BB0_6 Depth=1 - beq a1, s7, .LBB0_65 -.LBB0_63: # %for.body52.i.preheader + beq a1, s7, .LBB0_64 +.LBB0_62: # %for.body52.i.preheader # in Loop: Header=BB0_6 Depth=1 slli a0, a0, 2 add a0, s0, a0 slli a1, s7, 2 add a1, s0, a1 addi a1, a1, 4 -.LBB0_64: # %for.body52.i +.LBB0_63: # %for.body52.i # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 sw s9, 0(a0) addi a0, a0, 4 - bne a0, a1, .LBB0_64 -.LBB0_65: # %for.end58.i + bne a0, a1, .LBB0_63 +.LBB0_64: # %for.end58.i # in Loop: Header=BB0_6 Depth=1 lw a0, 0(s1) li a1, 0 @@ -735,53 +732,53 @@ sd s1, 344(sp) # 8-byte Folded Spill sd t1, 328(sp) # 8-byte Folded Spill sd t2, 320(sp) # 8-byte Folded Spill -.LBB0_66: # %land.rhs68.i +.LBB0_65: # %land.rhs68.i # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_82 Depth 3 - # Child Loop BB0_95 Depth 4 - # Child Loop BB0_104 Depth 3 - # Child Loop BB0_112 Depth 3 - # Child Loop BB0_114 Depth 3 + # Child Loop BB0_81 Depth 3 + # Child Loop BB0_94 Depth 4 + # Child Loop BB0_103 Depth 3 + # Child Loop BB0_111 Depth 3 + # Child Loop BB0_113 Depth 3 slli a0, a1, 2 add a0, s0, a0 lw a0, 0(a0) mv t5, s4 subw a0, s7, a0 li a2, 2 - bge t1, a0, .LBB0_69 -# %bb.67: # %if.else.i.i - # in Loop: Header=BB0_66 Depth=2 - bge a0, t2, .LBB0_74 -# %bb.68: # %if.then2.i.i - # in Loop: Header=BB0_66 Depth=2 - ld a0, 168(sp) # 8-byte Folded Reload + bge t1, a0, .LBB0_68 +# %bb.66: # %if.else.i.i + # in Loop: Header=BB0_65 Depth=2 + bge a0, t2, .LBB0_73 +# %bb.67: # %if.then2.i.i + # in Loop: Header=BB0_65 Depth=2 + ld a0, 200(sp) # 8-byte Folded Reload lw a2, 28(a0) -.LBB0_69: # %good_ratio.exit.i - # in Loop: Header=BB0_66 Depth=2 - bge a2, a1, .LBB0_77 -.LBB0_70: # %lor.rhs.i - # in Loop: Header=BB0_66 Depth=2 - beqz a1, .LBB0_119 -# %bb.71: # %land.rhs79.i - # in Loop: Header=BB0_66 Depth=2 +.LBB0_68: # %good_ratio.exit.i + # in Loop: Header=BB0_65 Depth=2 + bge a2, a1, .LBB0_76 +.LBB0_69: # %lor.rhs.i + # in Loop: Header=BB0_65 Depth=2 + beqz a1, .LBB0_118 +# %bb.70: # %land.rhs79.i + # in Loop: Header=BB0_65 Depth=2 addi a0, a1, -1 slli a2, a0, 2 add a2, s0, a2 lw a2, 0(a2) subw a2, s7, a2 li a3, 2 - bge t1, a2, .LBB0_76 -# %bb.72: # %if.else.i199.i - # in Loop: Header=BB0_66 Depth=2 - bge a2, t2, .LBB0_75 -# %bb.73: # %if.then2.i205.i - # in Loop: Header=BB0_66 Depth=2 - ld a2, 168(sp) # 8-byte Folded Reload + bge t1, a2, .LBB0_75 +# %bb.71: # %if.else.i199.i + # in Loop: Header=BB0_65 Depth=2 + bge a2, t2, .LBB0_74 +# %bb.72: # %if.then2.i205.i + # in Loop: Header=BB0_65 Depth=2 + ld a2, 200(sp) # 8-byte Folded Reload lw a3, 28(a2) - j .LBB0_76 -.LBB0_74: # %if.else3.i.i - # in Loop: Header=BB0_66 Depth=2 + j .LBB0_75 +.LBB0_73: # %if.else3.i.i + # in Loop: Header=BB0_65 Depth=2 .Lpcrel_hi2: auipc a2, %pcrel_hi(.LCPI0_0) fld fa5, %pcrel_lo(.Lpcrel_hi2)(a2) @@ -791,10 +788,10 @@ fcvt.d.w fa3, a0 fmadd.d fa5, fa3, fa4, fa5 fcvt.w.d a2, fa5, rtz - blt a2, a1, .LBB0_70 - j .LBB0_77 -.LBB0_75: # %if.else3.i202.i - # in Loop: Header=BB0_66 Depth=2 + blt a2, a1, .LBB0_69 + j .LBB0_76 +.LBB0_74: # %if.else3.i202.i + # in Loop: Header=BB0_65 Depth=2 .Lpcrel_hi4: auipc a3, %pcrel_hi(.LCPI0_0) fld fa5, %pcrel_lo(.Lpcrel_hi4)(a3) @@ -804,26 +801,26 @@ fcvt.d.w fa3, a2 fmadd.d fa5, fa3, fa4, fa5 fcvt.w.d a3, fa5, rtz -.LBB0_76: # %good_ratio.exit206.i - # in Loop: Header=BB0_66 Depth=2 - blt a3, a0, .LBB0_119 -.LBB0_77: # %while.body.i295 - # in Loop: Header=BB0_66 Depth=2 +.LBB0_75: # %good_ratio.exit206.i + # in Loop: Header=BB0_65 Depth=2 + blt a3, a0, .LBB0_118 +.LBB0_76: # %while.body.i295 + # in Loop: Header=BB0_65 Depth=2 addi s4, t5, 1 addi t3, t3, -1 addiw t6, t6, -1 - bge s4, t3, .LBB0_79 -# %bb.78: # %for.end216.thread.i - # in Loop: Header=BB0_66 Depth=2 + bge s4, t3, .LBB0_78 +# %bb.77: # %for.end216.thread.i + # in Loop: Header=BB0_65 Depth=2 lw a0, 0(s1) slli a2, s5, 2 add a3, s0, a2 sw a0, 0(a3) add a2, t4, a2 sw s10, 0(a2) - j .LBB0_115 -.LBB0_79: # %for.body93.lr.ph.i - # in Loop: Header=BB0_66 Depth=2 + j .LBB0_114 +.LBB0_78: # %for.body93.lr.ph.i + # in Loop: Header=BB0_65 Depth=2 sd s4, 368(sp) # 8-byte Folded Spill sd t3, 376(sp) # 8-byte Folded Spill li s0, 2 @@ -853,39 +850,39 @@ add a2, t1, a2 mv t1, a7 sd t2, 360(sp) # 8-byte Folded Spill - j .LBB0_82 -.LBB0_80: # %while.end.thread.i - # in Loop: Header=BB0_82 Depth=3 + j .LBB0_81 +.LBB0_79: # %while.end.thread.i + # in Loop: Header=BB0_81 Depth=3 slli t2, s1, 2 ld s4, 464(sp) # 8-byte Folded Reload add t2, s4, t2 sw t3, 0(t2) -.LBB0_81: # %for.inc214.i - # in Loop: Header=BB0_82 Depth=3 +.LBB0_80: # %for.inc214.i + # in Loop: Header=BB0_81 Depth=3 addi t2, s1, 1 addi t1, t1, 1 - blt t5, s1, .LBB0_102 -.LBB0_82: # %for.body93.i + blt t5, s1, .LBB0_101 +.LBB0_81: # %for.body93.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_66 Depth=2 + # Parent Loop BB0_65 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_95 Depth 4 + # Child Loop BB0_94 Depth 4 mv s1, t2 - bne t2, a0, .LBB0_84 -# %bb.83: # %if.then99.i - # in Loop: Header=BB0_82 Depth=3 + bne t2, a0, .LBB0_83 +# %bb.82: # %if.then99.i + # in Loop: Header=BB0_81 Depth=3 lw s4, 0(a2) - j .LBB0_93 -.LBB0_84: # %if.else.i - # in Loop: Header=BB0_82 Depth=3 - bne s1, a3, .LBB0_86 -# %bb.85: # %if.then107.i - # in Loop: Header=BB0_82 Depth=3 + j .LBB0_92 +.LBB0_83: # %if.else.i + # in Loop: Header=BB0_81 Depth=3 + bne s1, a3, .LBB0_85 +# %bb.84: # %if.then107.i + # in Loop: Header=BB0_81 Depth=3 lw s4, 0(a4) addiw s4, s4, -1 - j .LBB0_93 -.LBB0_86: # %if.else112.i - # in Loop: Header=BB0_82 Depth=3 + j .LBB0_92 +.LBB0_85: # %if.else112.i + # in Loop: Header=BB0_81 Depth=3 slli t2, s1, 2 ld t3, 456(sp) # 8-byte Folded Reload add t2, t3, t2 @@ -893,38 +890,38 @@ lw s6, 4(t2) lw t2, -4(t2) addiw s4, t3, -1 - blt s6, s4, .LBB0_88 -# %bb.87: # %if.else112.i - # in Loop: Header=BB0_82 Depth=3 - bge t2, t3, .LBB0_93 -.LBB0_88: # %if.else135.i - # in Loop: Header=BB0_82 Depth=3 + blt s6, s4, .LBB0_87 +# %bb.86: # %if.else112.i + # in Loop: Header=BB0_81 Depth=3 + bge t2, t3, .LBB0_92 +.LBB0_87: # %if.else135.i + # in Loop: Header=BB0_81 Depth=3 addiw s4, t2, -1 - blt s4, s6, .LBB0_90 -# %bb.89: # %if.else135.i - # in Loop: Header=BB0_82 Depth=3 + blt s4, s6, .LBB0_89 +# %bb.88: # %if.else135.i + # in Loop: Header=BB0_81 Depth=3 mv s4, s6 -.LBB0_90: # %if.else135.i - # in Loop: Header=BB0_82 Depth=3 - blt t3, t2, .LBB0_92 -# %bb.91: # %if.else135.i - # in Loop: Header=BB0_82 Depth=3 +.LBB0_89: # %if.else135.i + # in Loop: Header=BB0_81 Depth=3 + blt t3, t2, .LBB0_91 +# %bb.90: # %if.else135.i + # in Loop: Header=BB0_81 Depth=3 mv s6, s4 -.LBB0_92: # %if.else135.i - # in Loop: Header=BB0_82 Depth=3 +.LBB0_91: # %if.else135.i + # in Loop: Header=BB0_81 Depth=3 mv s4, s6 -.LBB0_93: # %if.end167.i - # in Loop: Header=BB0_82 Depth=3 +.LBB0_92: # %if.end167.i + # in Loop: Header=BB0_81 Depth=3 subw t2, s1, s7 addw s6, s4, t2 - blez s4, .LBB0_99 -# %bb.94: # %if.end167.i - # in Loop: Header=BB0_82 Depth=3 - blez s6, .LBB0_99 -.LBB0_95: # %land.rhs176.i + blez s4, .LBB0_98 +# %bb.93: # %if.end167.i + # in Loop: Header=BB0_81 Depth=3 + blez s6, .LBB0_98 +.LBB0_94: # %land.rhs176.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_66 Depth=2 - # Parent Loop BB0_82 Depth=3 + # Parent Loop BB0_65 Depth=2 + # Parent Loop BB0_81 Depth=3 # => This Inner Loop Header: Depth=4 mv t3, s4 addw t2, t1, s4 @@ -932,34 +929,34 @@ lbu s4, 0(s4) add s6, s8, t2 lbu s6, 0(s6) - bne s4, s6, .LBB0_80 -# %bb.96: # %while.body188.i - # in Loop: Header=BB0_95 Depth=4 + bne s4, s6, .LBB0_79 +# %bb.95: # %while.body188.i + # in Loop: Header=BB0_94 Depth=4 addiw s4, t3, -1 - blt t2, s0, .LBB0_98 -# %bb.97: # %while.body188.i - # in Loop: Header=BB0_95 Depth=4 - blt t4, t3, .LBB0_95 -.LBB0_98: # %while.end.i.loopexit - # in Loop: Header=BB0_82 Depth=3 + blt t2, s0, .LBB0_97 +# %bb.96: # %while.body188.i + # in Loop: Header=BB0_94 Depth=4 + blt t4, t3, .LBB0_94 +.LBB0_97: # %while.end.i.loopexit + # in Loop: Header=BB0_81 Depth=3 addw s6, t1, s4 -.LBB0_99: # %while.end.i - # in Loop: Header=BB0_82 Depth=3 +.LBB0_98: # %while.end.i + # in Loop: Header=BB0_81 Depth=3 slli t2, s1, 2 ld t3, 464(sp) # 8-byte Folded Reload add t2, t3, t2 or t3, s4, s6 sw s4, 0(t2) - beqz t3, .LBB0_116 -# %bb.100: # %if.end201.i - # in Loop: Header=BB0_82 Depth=3 - beqz s4, .LBB0_117 -# %bb.101: # %if.end207.i - # in Loop: Header=BB0_82 Depth=3 - bnez s6, .LBB0_81 - j .LBB0_118 -.LBB0_102: # %for.end216.i - # in Loop: Header=BB0_66 Depth=2 + beqz t3, .LBB0_115 +# %bb.99: # %if.end201.i + # in Loop: Header=BB0_81 Depth=3 + beqz s4, .LBB0_116 +# %bb.100: # %if.end207.i + # in Loop: Header=BB0_81 Depth=3 + bnez s6, .LBB0_80 + j .LBB0_117 +.LBB0_101: # %for.end216.i + # in Loop: Header=BB0_65 Depth=2 ld s1, 344(sp) # 8-byte Folded Reload lw a2, 0(s1) slli a3, s5, 2 @@ -973,49 +970,49 @@ mv a4, t0 ld t3, 56(sp) # 8-byte Folded Reload li s6, 2 - j .LBB0_104 -.LBB0_103: # %for.inc243.i - # in Loop: Header=BB0_104 Depth=3 + j .LBB0_103 +.LBB0_102: # %for.inc243.i + # in Loop: Header=BB0_103 Depth=3 addi a3, a3, 4 addi a4, a4, 1 - beq a3, a5, .LBB0_106 -.LBB0_104: # %for.body228.i + beq a3, a5, .LBB0_105 +.LBB0_103: # %for.body228.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_66 Depth=2 + # Parent Loop BB0_65 Depth=2 # => This Inner Loop Header: Depth=3 lw t1, 0(a3) lw t2, 0(a0) - bge t1, t2, .LBB0_103 -# %bb.105: # %if.then235.i - # in Loop: Header=BB0_104 Depth=3 + bge t1, t2, .LBB0_102 +# %bb.104: # %if.then235.i + # in Loop: Header=BB0_103 Depth=3 sw t1, 0(a0) sw a4, 0(a2) - j .LBB0_103 -.LBB0_106: # %for.body249.i.preheader - # in Loop: Header=BB0_66 Depth=2 + j .LBB0_102 +.LBB0_105: # %for.body249.i.preheader + # in Loop: Header=BB0_65 Depth=2 li a2, 8 ld a3, 336(sp) # 8-byte Folded Reload mv a0, a3 - bltu a2, a3, .LBB0_108 -# %bb.107: # %for.body249.i.preheader - # in Loop: Header=BB0_66 Depth=2 + bltu a2, a3, .LBB0_107 +# %bb.106: # %for.body249.i.preheader + # in Loop: Header=BB0_65 Depth=2 li a0, 8 -.LBB0_108: # %for.body249.i.preheader - # in Loop: Header=BB0_66 Depth=2 +.LBB0_107: # %for.body249.i.preheader + # in Loop: Header=BB0_65 Depth=2 ld t1, 328(sp) # 8-byte Folded Reload ld t2, 320(sp) # 8-byte Folded Reload ld a2, 352(sp) # 8-byte Folded Reload - bgeu a2, a0, .LBB0_110 -# %bb.109: # in Loop: Header=BB0_66 Depth=2 + bgeu a2, a0, .LBB0_109 +# %bb.108: # in Loop: Header=BB0_65 Depth=2 ld a3, 360(sp) # 8-byte Folded Reload - j .LBB0_113 -.LBB0_110: # %vector.memcheck1088 - # in Loop: Header=BB0_66 Depth=2 + j .LBB0_112 +.LBB0_109: # %vector.memcheck1088 + # in Loop: Header=BB0_65 Depth=2 ld a0, 304(sp) # 8-byte Folded Reload ld a3, 360(sp) # 8-byte Folded Reload - bltu a0, t3, .LBB0_113 -# %bb.111: # %vector.ph1095 - # in Loop: Header=BB0_66 Depth=2 + bltu a0, t3, .LBB0_112 +# %bb.110: # %vector.ph1095 + # in Loop: Header=BB0_65 Depth=2 ld a0, 312(sp) # 8-byte Folded Reload srli a0, a0, 1 neg a0, a0 @@ -1023,18 +1020,18 @@ ld a2, 384(sp) # 8-byte Folded Reload add a3, a0, a2 ld a2, 224(sp) # 8-byte Folded Reload -.LBB0_112: # %vector.body1101 +.LBB0_111: # %vector.body1101 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_66 Depth=2 + # Parent Loop BB0_65 Depth=2 # => This Inner Loop Header: Depth=3 vl2re32.v v8, (t6) vs2r.v v8, (ra) sub a0, a0, a2 add t6, t6, t3 add ra, ra, t3 - bnez a0, .LBB0_112 -.LBB0_113: # %for.body249.i.preheader1142 - # in Loop: Header=BB0_66 Depth=2 + bnez a0, .LBB0_111 +.LBB0_112: # %for.body249.i.preheader1142 + # in Loop: Header=BB0_65 Depth=2 subw a0, a6, a3 slli a3, a3, 2 ld a2, 456(sp) # 8-byte Folded Reload @@ -1045,26 +1042,26 @@ ld t3, 376(sp) # 8-byte Folded Reload ld t6, 384(sp) # 8-byte Folded Reload ld s4, 368(sp) # 8-byte Folded Reload -.LBB0_114: # %for.body249.i +.LBB0_113: # %for.body249.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_66 Depth=2 + # Parent Loop BB0_65 Depth=2 # => This Inner Loop Header: Depth=3 lw a4, 0(a3) sw a4, 0(a2) addiw a0, a0, -1 addi a2, a2, 4 addi a3, a3, 4 - bnez a0, .LBB0_114 -.LBB0_115: # %while.cond.loopexit.i - # in Loop: Header=BB0_66 Depth=2 + bnez a0, .LBB0_113 +.LBB0_114: # %while.cond.loopexit.i + # in Loop: Header=BB0_65 Depth=2 addi s5, s5, 1 addi a6, a6, 1 addi a1, a1, 1 addi a7, a7, -1 addiw t0, t0, -1 - bne a1, s9, .LBB0_66 - j .LBB0_120 -.LBB0_116: # %if.then198.i + bne a1, s9, .LBB0_65 + j .LBB0_119 +.LBB0_115: # %if.then198.i # in Loop: Header=BB0_6 Depth=1 ld a0, 456(sp) # 8-byte Folded Reload call free @@ -1077,15 +1074,15 @@ mv s2, s5 ld s11, 248(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload + ld s9, 200(sp) # 8-byte Folded Reload li s6, 2 li t2, -1 ld s8, 392(sp) # 8-byte Folded Reload ld s5, 440(sp) # 8-byte Folded Reload ld a5, 408(sp) # 8-byte Folded Reload ld a6, 400(sp) # 8-byte Folded Reload - j .LBB0_125 -.LBB0_117: # %if.then204.i + j .LBB0_124 +.LBB0_116: # %if.then204.i # in Loop: Header=BB0_6 Depth=1 ld a0, 456(sp) # 8-byte Folded Reload call free @@ -1100,14 +1097,14 @@ mv s2, s5 ld s11, 248(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload + ld s9, 200(sp) # 8-byte Folded Reload li s6, 2 li t2, -1 ld s8, 392(sp) # 8-byte Folded Reload ld s5, 440(sp) # 8-byte Folded Reload ld a5, 408(sp) # 8-byte Folded Reload - j .LBB0_125 -.LBB0_118: # %if.then210.i + j .LBB0_124 +.LBB0_117: # %if.then210.i # in Loop: Header=BB0_6 Depth=1 ld a0, 456(sp) # 8-byte Folded Reload call free @@ -1122,17 +1119,17 @@ mv s2, s5 ld s11, 248(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload + ld s9, 200(sp) # 8-byte Folded Reload li s6, 2 li t2, -1 ld s8, 392(sp) # 8-byte Folded Reload ld s5, 440(sp) # 8-byte Folded Reload ld a6, 400(sp) # 8-byte Folded Reload - j .LBB0_125 -.LBB0_119: # %while.end259.loopexit.split.loop.exit319.i + j .LBB0_124 +.LBB0_118: # %while.end259.loopexit.split.loop.exit319.i # in Loop: Header=BB0_6 Depth=1 mv s2, s5 -.LBB0_120: # %while.end259.i +.LBB0_119: # %while.end259.i # in Loop: Header=BB0_6 Depth=1 sext.w a0, s2 slti a0, a0, 1 @@ -1141,11 +1138,11 @@ and a0, a0, a1 ld s11, 248(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload + ld s9, 200(sp) # 8-byte Folded Reload ld s8, 392(sp) # 8-byte Folded Reload ld s5, 440(sp) # 8-byte Folded Reload ld a4, 400(sp) # 8-byte Folded Reload -.LBB0_121: # %land.rhs263.i +.LBB0_120: # %land.rhs263.i # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 sext.w a1, s2 @@ -1154,14 +1151,14 @@ lw a3, -4(a2) lw a2, 0(a2) subw a3, a3, a2 - blt s6, a3, .LBB0_124 -# %bb.122: # %while.body273.i - # in Loop: Header=BB0_121 Depth=2 + blt s6, a3, .LBB0_123 +# %bb.121: # %while.body273.i + # in Loop: Header=BB0_120 Depth=2 addi s2, s2, -1 - blt a5, a1, .LBB0_121 -# %bb.123: # in Loop: Header=BB0_6 Depth=1 + blt a5, a1, .LBB0_120 +# %bb.122: # in Loop: Header=BB0_6 Depth=1 mv s2, a0 -.LBB0_124: # %while.end275.i +.LBB0_123: # %while.end275.i # in Loop: Header=BB0_6 Depth=1 sext.w a0, s2 slli a0, a0, 2 @@ -1186,7 +1183,7 @@ mv a6, s1 mv a5, s0 li t2, -1 -.LBB0_125: # %extend_bw.exit +.LBB0_124: # %extend_bw.exit # in Loop: Header=BB0_6 Depth=1 lw a0, 4(s3) lw a1, 56(s9) @@ -1196,14 +1193,14 @@ mul a0, a0, a1 mul a1, a2, s2 addw a0, a1, a0 - bltz a0, .LBB0_127 -# %bb.126: # %if.then147 + bltz a0, .LBB0_126 +# %bb.125: # %if.then147 # in Loop: Header=BB0_6 Depth=1 addi a0, a5, 1 sw a0, 4(s3) addi a0, a6, 1 sw a0, 0(s3) -.LBB0_127: # %if.end158 +.LBB0_126: # %if.end158 # in Loop: Header=BB0_6 Depth=1 lw a0, 40(s8) .Lpcrel_hi6: @@ -1211,8 +1208,8 @@ sd a1, 208(sp) # 8-byte Folded Spill li t5, 1 mv a3, s8 - bnez a0, .LBB0_238 -# %bb.128: # %land.lhs.true161 + bnez a0, .LBB0_237 +# %bb.127: # %land.lhs.true161 # in Loop: Header=BB0_6 Depth=1 lw s1, 16(a3) ld a0, 8(a3) @@ -1225,30 +1222,30 @@ ld a0, 160(sp) # 8-byte Folded Reload lw a0, 0(a0) sext.w a2, a1 - bgeu a2, a0, .LBB0_238 -# %bb.129: # %if.then170 + bgeu a2, a0, .LBB0_237 +# %bb.128: # %if.then170 # in Loop: Header=BB0_6 Depth=1 subw a0, a0, a1 li a2, 61 - bltu a0, a2, .LBB0_145 -# %bb.130: # %land.lhs.true183 + bltu a0, a2, .LBB0_144 +# %bb.129: # %land.lhs.true183 # in Loop: Header=BB0_6 Depth=1 ld s8, 392(sp) # 8-byte Folded Reload lw a2, 24(s8) lw a3, 28(s8) lw a4, 8(s3) addw a2, a3, a2 - ld s7, 64(sp) # 8-byte Folded Reload - bgeu a4, a2, .LBB0_146 -# %bb.131: # %if.then188 + ld s7, 216(sp) # 8-byte Folded Reload + bgeu a4, a2, .LBB0_145 +# %bb.130: # %if.then188 # in Loop: Header=BB0_6 Depth=1 lw a2, 36(s11) li a3, 10 - bltu a2, a3, .LBB0_133 -# %bb.132: # %if.then188 + bltu a2, a3, .LBB0_132 +# %bb.131: # %if.then188 # in Loop: Header=BB0_6 Depth=1 li a2, 10 -.LBB0_133: # %if.then188 +.LBB0_132: # %if.then188 # in Loop: Header=BB0_6 Depth=1 ld a3, 16(s10) add a1, a3, a1 @@ -1294,28 +1291,27 @@ lui s1, 1024 add s1, s0, s1 mv s2, s0 - ld s4, 216(sp) # 8-byte Folded Reload -.LBB0_134: # %for.body.i312 +.LBB0_133: # %for.body.i312 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld a0, 0(s2) - mv a1, s4 + mv a1, s7 call tdestroy addi s2, s2, 8 - bne s2, s1, .LBB0_134 -# %bb.135: # %free_hash_env.exit318 + bne s2, s1, .LBB0_133 +# %bb.134: # %free_hash_env.exit318 # in Loop: Header=BB0_6 Depth=1 mv a0, s0 call free lwu a1, 520(sp) - beqz a1, .LBB0_147 -# %bb.136: # %for.body.preheader.i321 + beqz a1, .LBB0_146 +# %bb.135: # %for.body.preheader.i321 # in Loop: Header=BB0_6 Depth=1 li a0, 0 slli a1, a1, 3 vsetivli zero, 4, e32, m1, ta, ma li a3, 32 -.LBB0_137: # %for.body.i323 +.LBB0_136: # %for.body.i323 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld a2, 512(sp) @@ -1329,8 +1325,8 @@ vsetivli zero, 4, e32, m1, ta, ma addi a0, a0, 8 vse32.v v8, (a2) - bne a1, a0, .LBB0_137 -# %bb.138: # %swap_seqs.exit332 + bne a1, a0, .LBB0_136 +# %bb.137: # %swap_seqs.exit332 # in Loop: Header=BB0_6 Depth=1 ld a0, 512(sp) ld a0, 0(a0) @@ -1338,41 +1334,41 @@ lwu a7, 0(a0) add a2, a1, a7 addi a3, a2, -2 - bltu a3, a1, .LBB0_144 -# %bb.139: # %swap_seqs.exit332 + bltu a3, a1, .LBB0_143 +# %bb.138: # %swap_seqs.exit332 # in Loop: Header=BB0_6 Depth=1 ld a3, 16(s10) lwu a6, 4(a0) add a4, a3, a6 addi a5, a4, -2 - bltu a5, a3, .LBB0_144 -# %bb.140: # %land.rhs.i334.preheader + bltu a5, a3, .LBB0_143 +# %bb.139: # %land.rhs.i334.preheader # in Loop: Header=BB0_6 Depth=1 li a5, 0 addi a6, a6, -1 addi a7, a7, -1 -.LBB0_141: # %land.rhs.i334 +.LBB0_140: # %land.rhs.i334 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 add t0, a4, a5 add t1, a2, a5 lbu t2, -2(t1) lbu t3, -2(t0) - bne t2, t3, .LBB0_144 -# %bb.142: # %while.body.i337 - # in Loop: Header=BB0_141 Depth=2 + bne t2, t3, .LBB0_143 +# %bb.141: # %while.body.i337 + # in Loop: Header=BB0_140 Depth=2 addi t1, t1, -3 sw a7, 0(a0) sw a6, 4(a0) - bltu t1, a1, .LBB0_144 -# %bb.143: # %while.body.i337 - # in Loop: Header=BB0_141 Depth=2 + bltu t1, a1, .LBB0_143 +# %bb.142: # %while.body.i337 + # in Loop: Header=BB0_140 Depth=2 addi t0, t0, -3 addi a6, a6, -1 addi a7, a7, -1 addi a5, a5, -1 - bgeu t0, a3, .LBB0_141 -.LBB0_144: # %grow_exon_left.exit + bgeu t0, a3, .LBB0_140 +.LBB0_143: # %grow_exon_left.exit # in Loop: Header=BB0_6 Depth=1 lw a2, 16(s8) lw a3, 36(s11) @@ -1389,25 +1385,24 @@ ld s3, 0(a0) ld a0, 160(sp) # 8-byte Folded Reload li t2, -1 - bnez s1, .LBB0_148 - j .LBB0_159 -.LBB0_145: # in Loop: Header=BB0_6 Depth=1 - ld s7, 64(sp) # 8-byte Folded Reload + bnez s1, .LBB0_147 + j .LBB0_158 +.LBB0_144: # in Loop: Header=BB0_6 Depth=1 ld a0, 160(sp) # 8-byte Folded Reload ld s8, 392(sp) # 8-byte Folded Reload - bnez s1, .LBB0_148 - j .LBB0_159 -.LBB0_146: # in Loop: Header=BB0_6 Depth=1 + bnez s1, .LBB0_147 + j .LBB0_158 +.LBB0_145: # in Loop: Header=BB0_6 Depth=1 ld a0, 160(sp) # 8-byte Folded Reload - bnez s1, .LBB0_148 - j .LBB0_159 -.LBB0_147: # %free_hash_env.exit318.if.end234_crit_edge + bnez s1, .LBB0_147 + j .LBB0_158 +.LBB0_146: # %free_hash_env.exit318.if.end234_crit_edge # in Loop: Header=BB0_6 Depth=1 lw s1, 16(s8) ld a0, 160(sp) # 8-byte Folded Reload li t2, -1 - beqz s1, .LBB0_159 -.LBB0_148: # %land.rhs240.lr.ph + beqz s1, .LBB0_158 +.LBB0_147: # %land.rhs240.lr.ph # in Loop: Header=BB0_6 Depth=1 li s2, 0 ld s4, 0(s5) @@ -1415,7 +1410,7 @@ slli a0, s1, 32 srli s5, a0, 32 addi s6, s1, -1 -.LBB0_149: # %land.rhs240 +.LBB0_148: # %land.rhs240 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 slli a0, s6, 32 @@ -1426,32 +1421,32 @@ lw a0, 4(a0) mv a2, s0 call is_polyAT_exon_p - beqz a0, .LBB0_152 -# %bb.150: # %while.body251 - # in Loop: Header=BB0_149 Depth=2 + beqz a0, .LBB0_151 +# %bb.149: # %while.body251 + # in Loop: Header=BB0_148 Depth=2 addiw s2, s2, 1 addi s5, s5, -1 addi s6, s6, -1 - bnez s5, .LBB0_149 -# %bb.151: # in Loop: Header=BB0_6 Depth=1 + bnez s5, .LBB0_148 +# %bb.150: # in Loop: Header=BB0_6 Depth=1 mv s2, s1 -.LBB0_152: # %while.end253 +.LBB0_151: # %while.end253 # in Loop: Header=BB0_6 Depth=1 ld a0, 160(sp) # 8-byte Folded Reload li s6, 2 li t2, -1 ld s5, 440(sp) # 8-byte Folded Reload - beqz s2, .LBB0_159 -# %bb.153: # %if.then256 + beqz s2, .LBB0_158 +# %bb.152: # %if.then256 # in Loop: Header=BB0_6 Depth=1 subw a0, s1, s2 - bltu s1, s2, .LBB0_157 -# %bb.154: # %for.body264.preheader + bltu s1, s2, .LBB0_156 +# %bb.153: # %for.body264.preheader # in Loop: Header=BB0_6 Depth=1 slli a0, a0, 32 srli s0, a0, 32 srli s3, a0, 29 -.LBB0_155: # %for.body264 +.LBB0_154: # %for.body264 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld a0, 8(s8) @@ -1461,16 +1456,16 @@ lwu s1, 16(s8) addi s0, s0, 1 addi s3, s3, 8 - bltu s0, s1, .LBB0_155 -# %bb.156: # %for.end270.loopexit + bltu s0, s1, .LBB0_154 +# %bb.155: # %for.end270.loopexit # in Loop: Header=BB0_6 Depth=1 subw a0, s1, s2 -.LBB0_157: # %for.end270 +.LBB0_156: # %for.end270 # in Loop: Header=BB0_6 Depth=1 sext.w s1, s1 sw a0, 16(s8) beq s1, s2, .LBB0_5 -# %bb.158: # %cleanup283 +# %bb.157: # %cleanup283 # in Loop: Header=BB0_6 Depth=1 ld a1, 0(s5) addi a0, a0, -1 @@ -1480,49 +1475,50 @@ ld s3, 0(a0) ld a0, 160(sp) # 8-byte Folded Reload li t2, -1 -.LBB0_159: # %if.end286 +.LBB0_158: # %if.end286 # in Loop: Header=BB0_6 Depth=1 mv a3, s8 lwu s8, 12(s3) lw a0, 0(a0) sext.w a1, s8 li t5, 1 - beq a0, a1, .LBB0_238 -# %bb.160: # %if.then292 + beq a0, a1, .LBB0_237 +# %bb.159: # %if.then292 # in Loop: Header=BB0_6 Depth=1 - subw s9, a0, s8 + subw s11, a0, s8 li a0, 250 - bltu s9, a0, .LBB0_162 -# %bb.161: # %if.then292 + bltu s11, a0, .LBB0_161 +# %bb.160: # %if.then292 # in Loop: Header=BB0_6 Depth=1 - li s9, 250 -.LBB0_162: # %if.then292 + li s11, 250 +.LBB0_161: # %if.then292 # in Loop: Header=BB0_6 Depth=1 lwu s0, 8(s3) - lw a0, 16(s11) - slli a6, s9, 2 + ld a0, 248(sp) # 8-byte Folded Reload + lw a0, 16(a0) + slli a6, s11, 2 subw a0, a0, s0 - blt a6, a0, .LBB0_164 -# %bb.163: # %if.then292 + blt a6, a0, .LBB0_163 +# %bb.162: # %if.then292 # in Loop: Header=BB0_6 Depth=1 mv a6, a0 -.LBB0_164: # %if.then292 +.LBB0_163: # %if.then292 # in Loop: Header=BB0_6 Depth=1 ld a0, 248(sp) # 8-byte Folded Reload ld s10, 8(a0) ld a0, 264(sp) # 8-byte Folded Reload ld s1, 16(a0) add s10, s10, s0 - blez a6, .LBB0_173 -# %bb.165: # %land.rhs.preheader.i + blez a6, .LBB0_172 +# %bb.164: # %land.rhs.preheader.i # in Loop: Header=BB0_6 Depth=1 addiw a0, a6, -1 - addiw a1, s9, -1 - bltu a0, a1, .LBB0_167 -# %bb.166: # %land.rhs.preheader.i + addiw a1, s11, -1 + bltu a0, a1, .LBB0_166 +# %bb.165: # %land.rhs.preheader.i # in Loop: Header=BB0_6 Depth=1 mv a0, a1 -.LBB0_167: # %land.rhs.preheader.i +.LBB0_166: # %land.rhs.preheader.i # in Loop: Header=BB0_6 Depth=1 li s4, 0 add a1, s1, s8 @@ -1531,54 +1527,52 @@ srli a2, a2, 32 neg a2, a2 mv a3, s10 -.LBB0_168: # %land.rhs.i382 +.LBB0_167: # %land.rhs.i382 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 lbu a4, 0(a1) lbu a5, 0(a3) - bne a4, a5, .LBB0_171 -# %bb.169: # %for.inc.i385 - # in Loop: Header=BB0_168 Depth=2 + bne a4, a5, .LBB0_170 +# %bb.168: # %for.inc.i385 + # in Loop: Header=BB0_167 Depth=2 addiw s4, s4, 1 addi a2, a2, 1 addi a3, a3, 1 addi a1, a1, 1 - bnez a2, .LBB0_168 -# %bb.170: # in Loop: Header=BB0_6 Depth=1 + bnez a2, .LBB0_167 +# %bb.169: # in Loop: Header=BB0_6 Depth=1 mv s4, a0 -.LBB0_171: # %for.end.i341 +.LBB0_170: # %for.end.i341 # in Loop: Header=BB0_6 Depth=1 - bne s4, s9, .LBB0_174 -.LBB0_172: # %if.then.i380 + bne s4, s11, .LBB0_173 +.LBB0_171: # %if.then.i380 # in Loop: Header=BB0_6 Depth=1 li s6, 0 - add s0, s0, s9 - ld s11, 248(sp) # 8-byte Folded Reload + add s0, s0, s11 ld s10, 264(sp) # 8-byte Folded Reload - j .LBB0_236 -.LBB0_173: # in Loop: Header=BB0_6 Depth=1 + j .LBB0_235 +.LBB0_172: # in Loop: Header=BB0_6 Depth=1 li s4, 0 - beq zero, s9, .LBB0_172 -.LBB0_174: # %if.end.i344 + beq zero, s11, .LBB0_171 +.LBB0_173: # %if.end.i344 # in Loop: Header=BB0_6 Depth=1 sext.w s5, a6 - bne s4, s5, .LBB0_176 -# %bb.175: # %if.then14.i + bne s4, s5, .LBB0_175 +# %bb.174: # %if.then14.i # in Loop: Header=BB0_6 Depth=1 li s6, 0 add s0, a6, s0 - mv s9, a6 - ld s11, 248(sp) # 8-byte Folded Reload + mv s11, a6 ld s10, 264(sp) # 8-byte Folded Reload - j .LBB0_235 -.LBB0_176: # %if.end17.i + j .LBB0_234 +.LBB0_175: # %if.end17.i # in Loop: Header=BB0_6 Depth=1 - sd s8, 360(sp) # 8-byte Folded Spill - sd s0, 368(sp) # 8-byte Folded Spill + sd s0, 360(sp) # 8-byte Folded Spill ld a0, 248(sp) # 8-byte Folded Reload - lw s11, 36(a0) + lw a0, 36(a0) + sd a0, 456(sp) # 8-byte Folded Spill sd a6, 352(sp) # 8-byte Folded Spill - addw s6, a6, s9 + addw s6, a6, s11 addiw s2, s6, 1 slli s2, s2, 2 mv a0, s2 @@ -1587,25 +1581,24 @@ mv a0, s2 call xmalloc sd a0, 464(sp) # 8-byte Folded Spill - bltz s6, .LBB0_178 -# %bb.177: # %for.body30.preheader.i + bltz s6, .LBB0_177 +# %bb.176: # %for.body30.preheader.i # in Loop: Header=BB0_6 Depth=1 li a1, 255 mv a0, s7 mv a2, s2 call memset -.LBB0_178: # %land.rhs63.lr.ph.i +.LBB0_177: # %land.rhs63.lr.ph.i # in Loop: Header=BB0_6 Depth=1 - addiw s8, s9, 1 - li a0, 1 - slli a0, a0, 32 - addi a0, a0, -4 - sd a0, 432(sp) # 8-byte Folded Spill - slli s0, s9, 2 - add s6, s7, s0 - sd s6, 384(sp) # 8-byte Folded Spill + addiw s0, s11, 1 + li s9, 1 + slli s9, s9, 32 + addi s9, s9, -4 + sd s9, 424(sp) # 8-byte Folded Spill + slli s9, s11, 2 + add s6, s7, s9 sw s4, 0(s6) - slli s2, s8, 2 + slli s2, s0, 2 mv a0, s2 call xmalloc mv s4, a0 @@ -1614,135 +1607,135 @@ mv s2, a0 addi a0, s4, 4 li a1, 255 - mv a2, s0 + mv a2, s9 call memset - li t3, 1 mv t0, s4 + sd s6, 384(sp) # 8-byte Folded Spill lw a1, 0(s6) li a0, 0 sw a1, 0(s4) - sw s9, 0(s2) - srliw a1, s11, 31 - add a1, s11, a1 - sraiw a6, a1, 1 - slliw t4, s11, 1 - addi a3, s7, -4 - addi a1, s7, 4 - sd a1, 400(sp) # 8-byte Folded Spill - addiw s6, s9, 2 + sw s11, 0(s2) + ld a2, 456(sp) # 8-byte Folded Reload + srliw a1, a2, 31 + add a1, a2, a1 + sraiw t4, a1, 1 + slliw a1, a2, 1 + sd a1, 432(sp) # 8-byte Folded Spill + li t3, 1 + addi a1, s7, -4 + addi a2, s7, 4 + sd a2, 376(sp) # 8-byte Folded Spill + addiw s6, s11, 2 sd s7, 456(sp) # 8-byte Folded Spill - ld a4, 464(sp) # 8-byte Folded Reload - sub a1, s7, a4 - sd a1, 296(sp) # 8-byte Folded Spill - ld a1, 360(sp) # 8-byte Folded Reload - add s1, s1, a1 + ld t2, 464(sp) # 8-byte Folded Reload + sub a2, s7, t2 + sd a2, 304(sp) # 8-byte Folded Spill + add s1, s1, s8 sd s1, 448(sp) # 8-byte Folded Spill - addi a7, s9, -1 + addi a7, s11, -1 li a2, -1 li t1, 3 - add t2, a4, s0 + add t2, t2, s9 addi t2, t2, -4 - sd a3, 376(sp) # 8-byte Folded Spill - add s7, a3, s0 - ld s0, 432(sp) # 8-byte Folded Reload - addi s0, s0, 2 - mv s11, s9 + sd a1, 368(sp) # 8-byte Folded Spill + add s7, a1, s9 + ld s1, 424(sp) # 8-byte Folded Reload + addi s1, s1, 2 + mv a4, s11 mv t5, s6 - mv t6, s9 - sd s8, 408(sp) # 8-byte Folded Spill - mv ra, s8 - mv s8, a1 - sd s2, 424(sp) # 8-byte Folded Spill - sd s4, 416(sp) # 8-byte Folded Spill - sd a6, 336(sp) # 8-byte Folded Spill - sd t4, 328(sp) # 8-byte Folded Spill - sd s0, 320(sp) # 8-byte Folded Spill -.LBB0_179: # %land.rhs63.i + mv t6, s11 + sd s0, 400(sp) # 8-byte Folded Spill + mv ra, s0 + ld s9, 200(sp) # 8-byte Folded Reload + sd s2, 416(sp) # 8-byte Folded Spill + sd s4, 408(sp) # 8-byte Folded Spill + sd t4, 336(sp) # 8-byte Folded Spill + sd s1, 328(sp) # 8-byte Folded Spill +.LBB0_178: # %land.rhs63.i # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_194 Depth 3 - # Child Loop BB0_210 Depth 4 - # Child Loop BB0_221 Depth 3 - # Child Loop BB0_228 Depth 3 - # Child Loop BB0_230 Depth 3 - slli a1, a0, 2 - add a1, t0, a1 - lw a1, 0(a1) - li a3, 2 - bge a6, a1, .LBB0_182 -# %bb.180: # %if.else.i.i372 - # in Loop: Header=BB0_179 Depth=2 - bge a1, t4, .LBB0_187 -# %bb.181: # %if.then2.i.i377 - # in Loop: Header=BB0_179 Depth=2 - ld a1, 168(sp) # 8-byte Folded Reload - lw a3, 28(a1) -.LBB0_182: # %good_ratio.exit.i353 - # in Loop: Header=BB0_179 Depth=2 - bge a3, a0, .LBB0_190 -.LBB0_183: # %lor.rhs.i371 - # in Loop: Header=BB0_179 Depth=2 - bnez a0, .LBB0_184 - j .LBB0_672 -.LBB0_184: # %land.rhs73.i - # in Loop: Header=BB0_179 Depth=2 - addi a3, a0, -1 - slli a1, a3, 2 - add a1, t0, a1 - lw a1, 0(a1) + # Child Loop BB0_193 Depth 3 + # Child Loop BB0_209 Depth 4 + # Child Loop BB0_220 Depth 3 + # Child Loop BB0_227 Depth 3 + # Child Loop BB0_229 Depth 3 + slli a3, a0, 2 + add a3, t0, a3 + lw a3, 0(a3) li a5, 2 - bge a6, a1, .LBB0_189 -# %bb.185: # %if.else.i200.i - # in Loop: Header=BB0_179 Depth=2 - bge a1, t4, .LBB0_188 -# %bb.186: # %if.then2.i206.i - # in Loop: Header=BB0_179 Depth=2 - ld a1, 168(sp) # 8-byte Folded Reload - lw a5, 28(a1) - j .LBB0_189 -.LBB0_187: # %if.else3.i.i374 - # in Loop: Header=BB0_179 Depth=2 + bge t4, a3, .LBB0_181 +# %bb.179: # %if.else.i.i372 + # in Loop: Header=BB0_178 Depth=2 + ld a1, 432(sp) # 8-byte Folded Reload + bge a3, a1, .LBB0_186 +# %bb.180: # %if.then2.i.i377 + # in Loop: Header=BB0_178 Depth=2 + lw a5, 28(s9) +.LBB0_181: # %good_ratio.exit.i353 + # in Loop: Header=BB0_178 Depth=2 + bge a5, a0, .LBB0_189 +.LBB0_182: # %lor.rhs.i371 + # in Loop: Header=BB0_178 Depth=2 + bnez a0, .LBB0_183 + j .LBB0_671 +.LBB0_183: # %land.rhs73.i + # in Loop: Header=BB0_178 Depth=2 + addi a3, a0, -1 + slli a5, a3, 2 + add a5, t0, a5 + lw a5, 0(a5) + li a6, 2 + bge t4, a5, .LBB0_188 +# %bb.184: # %if.else.i200.i + # in Loop: Header=BB0_178 Depth=2 + ld a1, 432(sp) # 8-byte Folded Reload + bge a5, a1, .LBB0_187 +# %bb.185: # %if.then2.i206.i + # in Loop: Header=BB0_178 Depth=2 + lw a6, 28(s9) + j .LBB0_188 +.LBB0_186: # %if.else3.i.i374 + # in Loop: Header=BB0_178 Depth=2 .Lpcrel_hi7: - auipc a3, %pcrel_hi(.LCPI0_1) - fld fa5, %pcrel_lo(.Lpcrel_hi7)(a3) - ld a3, 208(sp) # 8-byte Folded Reload - fld fa4, %pcrel_lo(.Lpcrel_hi6)(a3) - fcvt.d.w fa3, a1 - fmadd.d fa5, fa3, fa5, fa4 - fcvt.w.d a3, fa5, rtz - blt a3, a0, .LBB0_183 - j .LBB0_190 -.LBB0_188: # %if.else3.i203.i - # in Loop: Header=BB0_179 Depth=2 -.Lpcrel_hi8: auipc a5, %pcrel_hi(.LCPI0_1) - fld fa5, %pcrel_lo(.Lpcrel_hi8)(a5) - ld a4, 208(sp) # 8-byte Folded Reload - fld fa4, %pcrel_lo(.Lpcrel_hi6)(a4) - ld a4, 464(sp) # 8-byte Folded Reload - fcvt.d.w fa3, a1 + fld fa5, %pcrel_lo(.Lpcrel_hi7)(a5) + ld a1, 208(sp) # 8-byte Folded Reload + fld fa4, %pcrel_lo(.Lpcrel_hi6)(a1) + fcvt.d.w fa3, a3 fmadd.d fa5, fa3, fa5, fa4 fcvt.w.d a5, fa5, rtz -.LBB0_189: # %good_ratio.exit207.i - # in Loop: Header=BB0_179 Depth=2 - bge a5, a3, .LBB0_190 - j .LBB0_672 -.LBB0_190: # %while.body.i355 - # in Loop: Header=BB0_179 Depth=2 + blt a5, a0, .LBB0_182 + j .LBB0_189 +.LBB0_187: # %if.else3.i203.i + # in Loop: Header=BB0_178 Depth=2 +.Lpcrel_hi8: + auipc a6, %pcrel_hi(.LCPI0_1) + fld fa5, %pcrel_lo(.Lpcrel_hi8)(a6) + ld a1, 208(sp) # 8-byte Folded Reload + fld fa4, %pcrel_lo(.Lpcrel_hi6)(a1) + fcvt.d.w fa3, a5 + fmadd.d fa5, fa3, fa5, fa4 + fcvt.w.d a6, fa5, rtz +.LBB0_188: # %good_ratio.exit207.i + # in Loop: Header=BB0_178 Depth=2 + bge a6, a3, .LBB0_189 + j .LBB0_671 +.LBB0_189: # %while.body.i355 + # in Loop: Header=BB0_178 Depth=2 addiw t6, t6, -1 - addi s11, s11, -1 - bge ra, t6, .LBB0_193 -# %bb.191: # %for.end205.thread.i - # in Loop: Header=BB0_179 Depth=2 + addi a4, a4, -1 + bge ra, t6, .LBB0_192 +# %bb.190: # %for.end205.thread.i + # in Loop: Header=BB0_178 Depth=2 ld a1, 384(sp) # 8-byte Folded Reload - lw a1, 0(a1) - slli a3, t3, 2 - add a5, t0, a3 - sw a1, 0(a5) - add a3, s2, a3 - sw s9, 0(a3) -.LBB0_192: # %for.end243.i - # in Loop: Header=BB0_179 Depth=2 + lw a3, 0(a1) + slli a5, t3, 2 + add a6, t0, a5 + sw a3, 0(a6) + add a5, s2, a5 + sw s11, 0(a5) +.LBB0_191: # %for.end243.i + # in Loop: Header=BB0_178 Depth=2 addiw ra, ra, 1 addi t3, t3, 1 addiw t5, t5, 1 @@ -1752,361 +1745,357 @@ addi t1, t1, 2 addi t2, t2, -4 addi s7, s7, -4 - ld a4, 464(sp) # 8-byte Folded Reload - ld a1, 408(sp) # 8-byte Folded Reload - bne a0, a1, .LBB0_179 - j .LBB0_673 -.LBB0_193: # %for.body86.lr.ph.i - # in Loop: Header=BB0_179 Depth=2 - slli a1, a0, 1 - addi a1, a1, 2 - and a1, a1, s0 - sd a1, 304(sp) # 8-byte Folded Spill - addi a1, a1, 1 - sd a1, 344(sp) # 8-byte Folded Spill - mv t4, s11 + ld a1, 400(sp) # 8-byte Folded Reload + bne a0, a1, .LBB0_178 + j .LBB0_672 +.LBB0_192: # %for.body86.lr.ph.i + # in Loop: Header=BB0_178 Depth=2 + slli a3, a0, 1 + addi a3, a3, 2 + and a3, a3, s1 + sd a3, 320(sp) # 8-byte Folded Spill + addi a3, a3, 1 + sd a3, 344(sp) # 8-byte Folded Spill not s1, a0 - add s1, s1, s9 - sd t3, 432(sp) # 8-byte Folded Spill - add a6, t3, s9 - slli a3, a6, 2 - ld a1, 376(sp) # 8-byte Folded Reload + add s1, s1, s11 + sd t3, 424(sp) # 8-byte Folded Spill + add s0, t3, s11 + slli a3, s0, 2 + ld a1, 368(sp) # 8-byte Folded Reload add a3, a1, a3 slli t3, s1, 2 - ld a1, 400(sp) # 8-byte Folded Reload + ld a1, 376(sp) # 8-byte Folded Reload add t3, a1, t3 mv t0, a2 -.LBB0_194: # %for.body86.i + mv t4, a4 +.LBB0_193: # %for.body86.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_179 Depth=2 + # Parent Loop BB0_178 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_210 Depth 4 + # Child Loop BB0_209 Depth 4 slli s2, t4, 2 - bne t4, s1, .LBB0_196 -# %bb.195: # %if.then91.i - # in Loop: Header=BB0_194 Depth=3 - lw s4, 0(t3) - addiw s4, s4, 1 - j .LBB0_206 -.LBB0_196: # %if.else.i357 - # in Loop: Header=BB0_194 Depth=3 - bne t4, a6, .LBB0_198 -# %bb.197: # %if.then99.i370 - # in Loop: Header=BB0_194 Depth=3 - lw s4, 0(a3) - j .LBB0_206 -.LBB0_198: # %if.else103.i - # in Loop: Header=BB0_194 Depth=3 - ld a1, 456(sp) # 8-byte Folded Reload - add a1, a1, s2 - lw s0, 0(a1) - lw a5, 4(a1) - bge s0, a5, .LBB0_200 -# %bb.199: # %if.else103.if.else124_crit_edge.i - # in Loop: Header=BB0_194 Depth=3 - lw a1, -4(a1) - j .LBB0_201 -.LBB0_200: # %land.lhs.true111.i - # in Loop: Header=BB0_194 Depth=3 - lw a1, -4(a1) - addiw s4, s0, 1 - bge s4, a1, .LBB0_206 -.LBB0_201: # %if.else124.i - # in Loop: Header=BB0_194 Depth=3 - addiw s4, a5, 1 - blt a1, s4, .LBB0_203 -# %bb.202: # %if.else124.i - # in Loop: Header=BB0_194 Depth=3 - mv s4, a1 -.LBB0_203: # %if.else124.i - # in Loop: Header=BB0_194 Depth=3 - blt a5, s0, .LBB0_205 -# %bb.204: # %if.else124.i - # in Loop: Header=BB0_194 Depth=3 - mv a1, s4 -.LBB0_205: # %if.else124.i - # in Loop: Header=BB0_194 Depth=3 - mv s4, a1 -.LBB0_206: # %if.end154.i - # in Loop: Header=BB0_194 Depth=3 - subw a1, t4, s9 - addw s0, s4, a1 - bltz s4, .LBB0_214 + bne t4, s1, .LBB0_195 +# %bb.194: # %if.then91.i + # in Loop: Header=BB0_193 Depth=3 + lw s9, 0(t3) + addiw s9, s9, 1 + j .LBB0_205 +.LBB0_195: # %if.else.i357 + # in Loop: Header=BB0_193 Depth=3 + bne t4, s0, .LBB0_197 +# %bb.196: # %if.then99.i370 + # in Loop: Header=BB0_193 Depth=3 + lw s9, 0(a3) + j .LBB0_205 +.LBB0_197: # %if.else103.i + # in Loop: Header=BB0_193 Depth=3 + ld a5, 456(sp) # 8-byte Folded Reload + add a5, a5, s2 + lw s4, 0(a5) + lw a6, 4(a5) + bge s4, a6, .LBB0_199 +# %bb.198: # %if.else103.if.else124_crit_edge.i + # in Loop: Header=BB0_193 Depth=3 + lw a5, -4(a5) + j .LBB0_200 +.LBB0_199: # %land.lhs.true111.i + # in Loop: Header=BB0_193 Depth=3 + lw a5, -4(a5) + addiw s9, s4, 1 + bge s9, a5, .LBB0_205 +.LBB0_200: # %if.else124.i + # in Loop: Header=BB0_193 Depth=3 + addiw s9, a6, 1 + blt a5, s9, .LBB0_202 +# %bb.201: # %if.else124.i + # in Loop: Header=BB0_193 Depth=3 + mv s9, a5 +.LBB0_202: # %if.else124.i + # in Loop: Header=BB0_193 Depth=3 + blt a6, s4, .LBB0_204 +# %bb.203: # %if.else124.i + # in Loop: Header=BB0_193 Depth=3 + mv a5, s9 +.LBB0_204: # %if.else124.i + # in Loop: Header=BB0_193 Depth=3 + mv s9, a5 +.LBB0_205: # %if.end154.i + # in Loop: Header=BB0_193 Depth=3 + subw a5, t4, s11 + addw s4, s9, a5 + bltz s9, .LBB0_213 +# %bb.206: # %while.cond160.preheader.i + # in Loop: Header=BB0_193 Depth=3 + bge s9, s11, .LBB0_213 # %bb.207: # %while.cond160.preheader.i - # in Loop: Header=BB0_194 Depth=3 - bge s4, s9, .LBB0_214 -# %bb.208: # %while.cond160.preheader.i - # in Loop: Header=BB0_194 Depth=3 - bge s0, s5, .LBB0_214 -# %bb.209: # %land.rhs166.preheader.i - # in Loop: Header=BB0_194 Depth=3 - addw a4, s4, t0 - slli a1, s4, 32 - srli a1, a1, 32 - ld a5, 448(sp) # 8-byte Folded Reload - add a5, a5, a1 -.LBB0_210: # %land.rhs166.i + # in Loop: Header=BB0_193 Depth=3 + bge s4, s5, .LBB0_213 +# %bb.208: # %land.rhs166.preheader.i + # in Loop: Header=BB0_193 Depth=3 + addw a1, s9, t0 + slli a5, s9, 32 + srli a5, a5, 32 + ld a6, 448(sp) # 8-byte Folded Reload + add a6, a6, a5 +.LBB0_209: # %land.rhs166.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_179 Depth=2 - # Parent Loop BB0_194 Depth=3 + # Parent Loop BB0_178 Depth=2 + # Parent Loop BB0_193 Depth=3 # => This Inner Loop Header: Depth=4 - lbu a1, 0(a5) - add s0, s10, a4 - lbu s0, 0(s0) - bne a1, s0, .LBB0_213 + lbu a5, 0(a6) + add s4, s10, a1 + lbu s4, 0(s4) + bne a5, s4, .LBB0_212 +# %bb.210: # %while.body176.i + # in Loop: Header=BB0_209 Depth=4 + addiw s9, s9, 1 + bge s9, s11, .LBB0_212 # %bb.211: # %while.body176.i - # in Loop: Header=BB0_210 Depth=4 - addiw s4, s4, 1 - bge s4, s9, .LBB0_213 -# %bb.212: # %while.body176.i - # in Loop: Header=BB0_210 Depth=4 - addi a4, a4, 1 - addi a5, a5, 1 - blt a4, s5, .LBB0_210 -.LBB0_213: # %if.end179.i.loopexit - # in Loop: Header=BB0_194 Depth=3 - addw s0, t0, s4 - ld a4, 464(sp) # 8-byte Folded Reload -.LBB0_214: # %if.end179.i - # in Loop: Header=BB0_194 Depth=3 - add s2, a4, s2 - sw s4, 0(s2) - bne s4, s9, .LBB0_216 -# %bb.215: # %if.end179.i - # in Loop: Header=BB0_194 Depth=3 - beq s0, s5, .LBB0_231 -.LBB0_216: # %if.end190.i - # in Loop: Header=BB0_194 Depth=3 + # in Loop: Header=BB0_209 Depth=4 + addi a1, a1, 1 + addi a6, a6, 1 + blt a1, s5, .LBB0_209 +.LBB0_212: # %if.end179.i.loopexit + # in Loop: Header=BB0_193 Depth=3 + addw s4, t0, s9 +.LBB0_213: # %if.end179.i + # in Loop: Header=BB0_193 Depth=3 + ld a1, 464(sp) # 8-byte Folded Reload + add s2, a1, s2 + sw s9, 0(s2) + bne s9, s11, .LBB0_215 +# %bb.214: # %if.end179.i + # in Loop: Header=BB0_193 Depth=3 + beq s4, s5, .LBB0_230 +.LBB0_215: # %if.end190.i + # in Loop: Header=BB0_193 Depth=3 ld s2, 224(sp) # 8-byte Folded Reload - beq s4, s9, .LBB0_232 -# %bb.217: # %if.end196.i - # in Loop: Header=BB0_194 Depth=3 - beq s0, s5, .LBB0_233 -# %bb.218: # %for.cond83.i - # in Loop: Header=BB0_194 Depth=3 - addiw a1, t4, 1 + beq s9, s11, .LBB0_231 +# %bb.216: # %if.end196.i + # in Loop: Header=BB0_193 Depth=3 + beq s4, s5, .LBB0_232 +# %bb.217: # %for.cond83.i + # in Loop: Header=BB0_193 Depth=3 + addiw a5, t4, 1 addi t4, t4, 1 addi t0, t0, 1 - bne t5, a1, .LBB0_194 -# %bb.219: # %for.end205.i - # in Loop: Header=BB0_179 Depth=2 + ld s9, 200(sp) # 8-byte Folded Reload + bne t5, a5, .LBB0_193 +# %bb.218: # %for.end205.i + # in Loop: Header=BB0_178 Depth=2 ld a1, 384(sp) # 8-byte Folded Reload lw a1, 0(a1) - ld a5, 432(sp) # 8-byte Folded Reload - slli a5, a5, 2 - ld a3, 416(sp) # 8-byte Folded Reload - add a3, a3, a5 + ld a6, 424(sp) # 8-byte Folded Reload + slli a6, a6, 2 + ld a3, 408(sp) # 8-byte Folded Reload + add a3, a3, a6 sw a1, 0(a3) - ld a1, 424(sp) # 8-byte Folded Reload - add a5, a1, a5 - sw s9, 0(a5) - mv a6, t2 - mv a1, t1 - mv t0, a7 - ld t4, 56(sp) # 8-byte Folded Reload - ld s0, 320(sp) # 8-byte Folded Reload - j .LBB0_221 -.LBB0_220: # %for.inc230.i - # in Loop: Header=BB0_221 Depth=3 - addi t0, t0, 1 - addiw a1, a1, -1 - addi a6, a6, 4 - beqz a1, .LBB0_223 -.LBB0_221: # %for.body215.i + ld a1, 416(sp) # 8-byte Folded Reload + add a6, a1, a6 + sw s11, 0(a6) + mv t0, t2 + mv a5, t1 + mv t3, a7 + ld s0, 312(sp) # 8-byte Folded Reload + ld s1, 328(sp) # 8-byte Folded Reload + j .LBB0_220 +.LBB0_219: # %for.inc230.i + # in Loop: Header=BB0_220 Depth=3 + addi t3, t3, 1 + addiw a5, a5, -1 + addi t0, t0, 4 + beqz a5, .LBB0_222 +.LBB0_220: # %for.body215.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_179 Depth=2 + # Parent Loop BB0_178 Depth=2 # => This Inner Loop Header: Depth=3 - lw t3, 0(a6) - lw a4, 0(a3) - bge a4, t3, .LBB0_220 -# %bb.222: # %if.then222.i - # in Loop: Header=BB0_221 Depth=3 - sw t3, 0(a3) - sw t0, 0(a5) - j .LBB0_220 -.LBB0_223: # %for.body236.i.preheader - # in Loop: Header=BB0_179 Depth=2 - ld a1, 312(sp) # 8-byte Folded Reload - srli a1, a1, 1 - li a3, 8 - bltu a3, a1, .LBB0_225 -# %bb.224: # %for.body236.i.preheader - # in Loop: Header=BB0_179 Depth=2 + lw t4, 0(t0) + lw a1, 0(a3) + bge a1, t4, .LBB0_219 +# %bb.221: # %if.then222.i + # in Loop: Header=BB0_220 Depth=3 + sw t4, 0(a3) + sw t3, 0(a6) + j .LBB0_219 +.LBB0_222: # %for.body236.i.preheader + # in Loop: Header=BB0_178 Depth=2 + srli a5, s0, 1 li a1, 8 -.LBB0_225: # %for.body236.i.preheader - # in Loop: Header=BB0_179 Depth=2 - mv a3, s11 - ld t0, 416(sp) # 8-byte Folded Reload - ld t3, 432(sp) # 8-byte Folded Reload - ld a4, 344(sp) # 8-byte Folded Reload - bltu a4, a1, .LBB0_229 -# %bb.226: # %vector.memcheck1070 - # in Loop: Header=BB0_179 Depth=2 - mv a3, s11 - ld a1, 296(sp) # 8-byte Folded Reload - bltu a1, t4, .LBB0_229 -# %bb.227: # %vector.ph1077 - # in Loop: Header=BB0_179 Depth=2 - ld a1, 312(sp) # 8-byte Folded Reload - srli a1, a1, 1 + bltu a1, a5, .LBB0_224 +# %bb.223: # %for.body236.i.preheader + # in Loop: Header=BB0_178 Depth=2 + li a5, 8 +.LBB0_224: # %for.body236.i.preheader + # in Loop: Header=BB0_178 Depth=2 + mv a3, a4 + ld t3, 56(sp) # 8-byte Folded Reload + ld t4, 336(sp) # 8-byte Folded Reload + ld a1, 344(sp) # 8-byte Folded Reload + bltu a1, a5, .LBB0_228 +# %bb.225: # %vector.memcheck1070 + # in Loop: Header=BB0_178 Depth=2 + mv a3, a4 + ld a1, 304(sp) # 8-byte Folded Reload + bltu a1, t3, .LBB0_228 +# %bb.226: # %vector.ph1077 + # in Loop: Header=BB0_178 Depth=2 + srli a1, s0, 1 neg a1, a1 - ld a3, 304(sp) # 8-byte Folded Reload - and a1, a3, a1 - add a3, s11, a1 - mv a5, s7 - mv a6, t2 -.LBB0_228: # %vector.body1083 + ld a5, 320(sp) # 8-byte Folded Reload + and a5, a5, a1 + add a3, a4, a5 + mv a6, s7 + mv t0, t2 +.LBB0_227: # %vector.body1083 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_179 Depth=2 + # Parent Loop BB0_178 Depth=2 # => This Inner Loop Header: Depth=3 - vl2re32.v v8, (a6) - vs2r.v v8, (a5) - sub a1, a1, s2 - add a6, a6, t4 - add a5, a5, t4 - bnez a1, .LBB0_228 -.LBB0_229: # %for.body236.i.preheader1141 - # in Loop: Header=BB0_179 Depth=2 - slli a4, a3, 2 - ld a1, 456(sp) # 8-byte Folded Reload - add a1, a1, a4 - ld a5, 464(sp) # 8-byte Folded Reload - add a5, a5, a4 + vl2re32.v v8, (t0) + vs2r.v v8, (a6) + sub a5, a5, s2 + add t0, t0, t3 + add a6, a6, t3 + bnez a5, .LBB0_227 +.LBB0_228: # %for.body236.i.preheader1141 + # in Loop: Header=BB0_178 Depth=2 + slli a1, a3, 2 + ld a5, 456(sp) # 8-byte Folded Reload + add a5, a5, a1 + ld a6, 464(sp) # 8-byte Folded Reload + add a6, a6, a1 subw a3, ra, a3 slli a3, a3, 32 srli a3, a3, 30 - ld a6, 400(sp) # 8-byte Folded Reload - add a4, a6, a4 - add a3, a4, a3 - ld s2, 424(sp) # 8-byte Folded Reload - ld a6, 336(sp) # 8-byte Folded Reload - ld t4, 328(sp) # 8-byte Folded Reload -.LBB0_230: # %for.body236.i + ld t0, 376(sp) # 8-byte Folded Reload + add a1, t0, a1 + add a3, a1, a3 + ld s2, 416(sp) # 8-byte Folded Reload + ld t0, 408(sp) # 8-byte Folded Reload + ld t3, 424(sp) # 8-byte Folded Reload +.LBB0_229: # %for.body236.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_179 Depth=2 + # Parent Loop BB0_178 Depth=2 # => This Inner Loop Header: Depth=3 - lw a4, 0(a5) - sw a4, 0(a1) - addi a1, a1, 4 + lw a1, 0(a6) + sw a1, 0(a5) addi a5, a5, 4 - bne a1, a3, .LBB0_230 - j .LBB0_192 -.LBB0_231: # %if.then187.i + addi a6, a6, 4 + bne a5, a3, .LBB0_229 + j .LBB0_191 +.LBB0_230: # %if.then187.i # in Loop: Header=BB0_6 Depth=1 ld a0, 456(sp) # 8-byte Folded Reload - mv s0, a4 call free - mv a0, s0 + ld a0, 464(sp) # 8-byte Folded Reload call free - ld a0, 416(sp) # 8-byte Folded Reload + ld a0, 408(sp) # 8-byte Folded Reload call free - ld a0, 424(sp) # 8-byte Folded Reload + ld a0, 416(sp) # 8-byte Folded Reload call free - ld s0, 368(sp) # 8-byte Folded Reload + ld s0, 360(sp) # 8-byte Folded Reload ld a0, 352(sp) # 8-byte Folded Reload add s0, a0, s0 - j .LBB0_234 -.LBB0_232: # %if.then193.i + j .LBB0_233 +.LBB0_231: # %if.then193.i # in Loop: Header=BB0_6 Depth=1 - mv a0, a4 + ld a0, 464(sp) # 8-byte Folded Reload call free ld a0, 456(sp) # 8-byte Folded Reload call free - ld a0, 416(sp) # 8-byte Folded Reload + ld a0, 408(sp) # 8-byte Folded Reload call free - ld a0, 424(sp) # 8-byte Folded Reload + ld a0, 416(sp) # 8-byte Folded Reload call free - ld a0, 368(sp) # 8-byte Folded Reload - add s0, s0, a0 - j .LBB0_234 -.LBB0_233: # %if.then199.i + ld s0, 360(sp) # 8-byte Folded Reload + add s0, s4, s0 + j .LBB0_233 +.LBB0_232: # %if.then199.i # in Loop: Header=BB0_6 Depth=1 - mv a0, a4 + ld a0, 464(sp) # 8-byte Folded Reload call free ld a0, 456(sp) # 8-byte Folded Reload call free - ld a0, 416(sp) # 8-byte Folded Reload + ld a0, 408(sp) # 8-byte Folded Reload call free - ld a0, 424(sp) # 8-byte Folded Reload + ld a0, 416(sp) # 8-byte Folded Reload call free - ld s0, 368(sp) # 8-byte Folded Reload + ld s0, 360(sp) # 8-byte Folded Reload ld a0, 352(sp) # 8-byte Folded Reload add s0, a0, s0 - mv s9, s4 -.LBB0_234: # %extend_fw.exit + mv s11, s9 +.LBB0_233: # %extend_fw.exit # in Loop: Header=BB0_6 Depth=1 - ld s6, 432(sp) # 8-byte Folded Reload - ld s11, 248(sp) # 8-byte Folded Reload + ld s6, 424(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload + ld s9, 200(sp) # 8-byte Folded Reload li t5, 1 li t2, -1 -.LBB0_235: # %extend_fw.exit +.LBB0_234: # %extend_fw.exit # in Loop: Header=BB0_6 Depth=1 ld s5, 440(sp) # 8-byte Folded Reload -.LBB0_236: # %extend_fw.exit +.LBB0_235: # %extend_fw.exit # in Loop: Header=BB0_6 Depth=1 lw a0, 12(s3) - ld a3, 168(sp) # 8-byte Folded Reload - lw a1, 56(a3) - lw a2, 48(a3) - add s8, s9, s8 - mv s9, a3 + lw a1, 56(s9) + lw a2, 48(s9) + add s8, s11, s8 subw a0, s8, a0 mul a0, a0, a1 mul a1, a2, s6 addw a0, a1, a0 + ld s11, 248(sp) # 8-byte Folded Reload li s6, 2 ld a3, 392(sp) # 8-byte Folded Reload - bltz a0, .LBB0_238 -# %bb.237: # %if.then342 + bltz a0, .LBB0_237 +# %bb.236: # %if.then342 # in Loop: Header=BB0_6 Depth=1 sw s8, 12(s3) sw s0, 8(s3) -.LBB0_238: # %if.end351 +.LBB0_237: # %if.end351 # in Loop: Header=BB0_6 Depth=1 lw a0, 16(a3) .Lpcrel_hi9: auipc a1, %pcrel_hi(.LCPI0_2) sd a1, 296(sp) # 8-byte Folded Spill mv s8, a3 - bgeu a0, s6, .LBB0_239 - j .LBB0_439 -.LBB0_239: # %for.body361.preheader + bgeu a0, s6, .LBB0_238 + j .LBB0_438 +.LBB0_238: # %for.body361.preheader # in Loop: Header=BB0_6 Depth=1 li a3, 1 - j .LBB0_242 -.LBB0_240: # in Loop: Header=BB0_242 Depth=2 + j .LBB0_241 +.LBB0_239: # in Loop: Header=BB0_241 Depth=2 ld s5, 440(sp) # 8-byte Folded Reload -.LBB0_241: # %cleanup497 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_240: # %cleanup497 + # in Loop: Header=BB0_241 Depth=2 addiw a3, a3, 1 - bltu a3, a0, .LBB0_242 - j .LBB0_439 -.LBB0_242: # %for.body361 + bltu a3, a0, .LBB0_241 + j .LBB0_438 +.LBB0_241: # %for.body361 # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_265 Depth 3 - # Child Loop BB0_299 Depth 3 - # Child Loop BB0_302 Depth 3 - # Child Loop BB0_307 Depth 3 - # Child Loop BB0_437 Depth 3 - # Child Loop BB0_320 Depth 3 - # Child Loop BB0_329 Depth 3 - # Child Loop BB0_332 Depth 3 - # Child Loop BB0_336 Depth 3 - # Child Loop BB0_341 Depth 4 - # Child Loop BB0_353 Depth 5 - # Child Loop BB0_358 Depth 4 - # Child Loop BB0_363 Depth 4 - # Child Loop BB0_369 Depth 4 - # Child Loop BB0_385 Depth 5 - # Child Loop BB0_392 Depth 4 - # Child Loop BB0_397 Depth 4 - # Child Loop BB0_425 Depth 3 - # Child Loop BB0_433 Depth 3 - # Child Loop BB0_250 Depth 3 - # Child Loop BB0_255 Depth 3 - # Child Loop BB0_278 Depth 3 + # Child Loop BB0_264 Depth 3 + # Child Loop BB0_298 Depth 3 + # Child Loop BB0_301 Depth 3 + # Child Loop BB0_306 Depth 3 + # Child Loop BB0_436 Depth 3 + # Child Loop BB0_319 Depth 3 + # Child Loop BB0_328 Depth 3 + # Child Loop BB0_331 Depth 3 + # Child Loop BB0_335 Depth 3 + # Child Loop BB0_340 Depth 4 + # Child Loop BB0_352 Depth 5 + # Child Loop BB0_357 Depth 4 + # Child Loop BB0_362 Depth 4 + # Child Loop BB0_368 Depth 4 + # Child Loop BB0_384 Depth 5 + # Child Loop BB0_391 Depth 4 + # Child Loop BB0_396 Depth 4 + # Child Loop BB0_424 Depth 3 + # Child Loop BB0_432 Depth 3 + # Child Loop BB0_249 Depth 3 + # Child Loop BB0_254 Depth 3 + # Child Loop BB0_277 Depth 3 ld a1, 0(s5) addi s0, a3, -1 slli a2, s0, 32 @@ -2121,41 +2110,41 @@ lw a5, 4(a4) not a1, a6 addw s7, a5, a1 - blez s7, .LBB0_241 -# %bb.243: # %if.then376 - # in Loop: Header=BB0_242 Depth=2 + blez s7, .LBB0_240 +# %bb.242: # %if.then376 + # in Loop: Header=BB0_241 Depth=2 lw s5, 0(a4) lwu a7, 8(s2) addiw a2, s5, -1 sext.w a1, a7 - bgeu a1, a2, .LBB0_240 -# %bb.244: # %if.then382 - # in Loop: Header=BB0_242 Depth=2 + bgeu a1, a2, .LBB0_239 +# %bb.243: # %if.then382 + # in Loop: Header=BB0_241 Depth=2 sd a3, 288(sp) # 8-byte Folded Spill lw a3, 36(s11) ld t6, 8(s11) li a0, 500 - bltu a0, s7, .LBB0_247 -# %bb.245: # %if.then386 - # in Loop: Header=BB0_242 Depth=2 + bltu a0, s7, .LBB0_246 +# %bb.244: # %if.then386 + # in Loop: Header=BB0_241 Depth=2 not a0, a7 addw s1, s5, a0 ld a0, 144(sp) # 8-byte Folded Reload - bgeu a0, s1, .LBB0_260 -# %bb.246: # in Loop: Header=BB0_242 Depth=2 + bgeu a0, s1, .LBB0_259 +# %bb.245: # in Loop: Header=BB0_241 Depth=2 li s8, 0 ld s5, 440(sp) # 8-byte Folded Reload - j .LBB0_417 -.LBB0_247: # in Loop: Header=BB0_242 Depth=2 + j .LBB0_416 +.LBB0_246: # in Loop: Header=BB0_241 Depth=2 ld s5, 440(sp) # 8-byte Folded Reload mv s4, s0 li a0, 8 - bltu a3, a0, .LBB0_249 -.LBB0_248: # %if.end446 - # in Loop: Header=BB0_242 Depth=2 + bltu a3, a0, .LBB0_248 +.LBB0_247: # %if.end446 + # in Loop: Header=BB0_241 Depth=2 li a3, 8 -.LBB0_249: # %if.end446 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_248: # %if.end446 + # in Loop: Header=BB0_241 Depth=2 lwu a0, 8(s2) lw a1, 0(a4) add a2, t6, a0 @@ -2201,23 +2190,23 @@ add s1, s0, s1 mv s2, s0 ld s3, 216(sp) # 8-byte Folded Reload -.LBB0_250: # %for.body.i485 +.LBB0_249: # %for.body.i485 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 ld a0, 0(s2) mv a1, s3 call tdestroy addi s2, s2, 8 - bne s2, s1, .LBB0_250 -# %bb.251: # %free_hash_env.exit491 - # in Loop: Header=BB0_242 Depth=2 + bne s2, s1, .LBB0_249 +# %bb.250: # %free_hash_env.exit491 + # in Loop: Header=BB0_241 Depth=2 mv a0, s0 call free lw a2, 520(sp) - beqz a2, .LBB0_259 -# %bb.252: # %if.then474 - # in Loop: Header=BB0_242 Depth=2 + beqz a2, .LBB0_258 +# %bb.251: # %if.then474 + # in Loop: Header=BB0_241 Depth=2 ld a6, 512(sp) ld a3, 0(a6) ld a0, 8(s11) @@ -2225,52 +2214,52 @@ ld a1, 16(s10) add a4, a0, t0 addi a5, a4, -2 - bltu a5, a0, .LBB0_273 -# %bb.253: # %if.then474 - # in Loop: Header=BB0_242 Depth=2 + bltu a5, a0, .LBB0_272 +# %bb.252: # %if.then474 + # in Loop: Header=BB0_241 Depth=2 lwu a7, 4(a3) add a5, a1, a7 addi t1, a5, -2 li t5, 1 mv s0, s4 - bltu t1, a1, .LBB0_274 -# %bb.254: # %land.rhs.i502.preheader - # in Loop: Header=BB0_242 Depth=2 + bltu t1, a1, .LBB0_273 +# %bb.253: # %land.rhs.i502.preheader + # in Loop: Header=BB0_241 Depth=2 li a6, 0 addi a7, a7, -1 addi t0, t0, -1 -.LBB0_255: # %land.rhs.i502 +.LBB0_254: # %land.rhs.i502 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 add t1, a5, a6 add t2, a4, a6 lbu t3, -2(t2) lbu t4, -2(t1) - bne t3, t4, .LBB0_258 -# %bb.256: # %while.body.i507 - # in Loop: Header=BB0_255 Depth=3 + bne t3, t4, .LBB0_257 +# %bb.255: # %while.body.i507 + # in Loop: Header=BB0_254 Depth=3 addi t2, t2, -3 sw t0, 0(a3) sw a7, 4(a3) - bltu t2, a0, .LBB0_258 -# %bb.257: # %while.body.i507 - # in Loop: Header=BB0_255 Depth=3 + bltu t2, a0, .LBB0_257 +# %bb.256: # %while.body.i507 + # in Loop: Header=BB0_254 Depth=3 addi t1, t1, -3 addi a7, a7, -1 addi t0, t0, -1 addi a6, a6, -1 - bgeu t1, a1, .LBB0_255 -.LBB0_258: # %grow_exon_left.exit515.loopexit - # in Loop: Header=BB0_242 Depth=2 + bgeu t1, a1, .LBB0_254 +.LBB0_257: # %grow_exon_left.exit515.loopexit + # in Loop: Header=BB0_241 Depth=2 ld a6, 512(sp) - j .LBB0_274 -.LBB0_259: # in Loop: Header=BB0_242 Depth=2 + j .LBB0_273 +.LBB0_258: # in Loop: Header=BB0_241 Depth=2 li t2, -1 ld a3, 288(sp) # 8-byte Folded Reload - j .LBB0_283 -.LBB0_260: # %if.end.i389 - # in Loop: Header=BB0_242 Depth=2 + j .LBB0_282 +.LBB0_259: # %if.end.i389 + # in Loop: Header=BB0_241 Depth=2 ld a0, 208(sp) # 8-byte Folded Reload fld fs0, %pcrel_lo(.Lpcrel_hi6)(a0) ld a0, 296(sp) # 8-byte Folded Reload @@ -2279,52 +2268,52 @@ fmadd.d fa4, fa5, fs1, fs0 fcvt.wu.d a0, fa4, rtz sd a0, 448(sp) # 8-byte Folded Spill - bltu a3, a0, .LBB0_262 -# %bb.261: # %if.end.i389 - # in Loop: Header=BB0_242 Depth=2 + bltu a3, a0, .LBB0_261 +# %bb.260: # %if.end.i389 + # in Loop: Header=BB0_241 Depth=2 sd a3, 448(sp) # 8-byte Folded Spill -.LBB0_262: # %if.end.i389 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_261: # %if.end.i389 + # in Loop: Header=BB0_241 Depth=2 subw t1, s1, s7 - bltz t1, .LBB0_284 -# %bb.263: # %if.end45.i - # in Loop: Header=BB0_242 Depth=2 + bltz t1, .LBB0_283 +# %bb.262: # %if.end45.i + # in Loop: Header=BB0_241 Depth=2 sd a2, 48(sp) # 8-byte Folded Spill - sd s2, 88(sp) # 8-byte Folded Spill + sd s2, 80(sp) # 8-byte Folded Spill ld t0, 16(s10) add s11, t0, a6 add s8, t6, a7 addi s2, s11, -1 addi s3, s8, -1 mv s9, s7 - beqz s1, .LBB0_289 -# %bb.264: # %land.rhs.i412.preheader - # in Loop: Header=BB0_242 Depth=2 + beqz s1, .LBB0_288 +# %bb.263: # %land.rhs.i412.preheader + # in Loop: Header=BB0_241 Depth=2 mv a0, s1 mv s9, s7 -.LBB0_265: # %land.rhs.i412 +.LBB0_264: # %land.rhs.i412 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 mv a1, s9 add a2, s2, s9 lbu a2, 0(a2) add a3, s3, a0 lbu a3, 0(a3) - bne a2, a3, .LBB0_288 -# %bb.266: # %for.inc.i414 - # in Loop: Header=BB0_265 Depth=3 + bne a2, a3, .LBB0_287 +# %bb.265: # %for.inc.i414 + # in Loop: Header=BB0_264 Depth=3 addiw s9, a1, -1 - blt a0, s6, .LBB0_268 -# %bb.267: # %for.inc.i414 - # in Loop: Header=BB0_265 Depth=3 + blt a0, s6, .LBB0_267 +# %bb.266: # %for.inc.i414 + # in Loop: Header=BB0_264 Depth=3 addiw a0, a0, -1 - blt t5, a1, .LBB0_265 -.LBB0_268: # %for.end.i393 - # in Loop: Header=BB0_242 Depth=2 - bnez s9, .LBB0_289 -# %bb.269: # %if.then62.i - # in Loop: Header=BB0_242 Depth=2 + blt t5, a1, .LBB0_264 +.LBB0_267: # %for.end.i393 + # in Loop: Header=BB0_241 Depth=2 + bnez s9, .LBB0_288 +# %bb.268: # %if.then62.i + # in Loop: Header=BB0_241 Depth=2 mv s8, a4 mv s4, s0 add a0, a7, t1 @@ -2342,10 +2331,10 @@ lw a2, 524(sp) lw a1, 520(sp) ld a0, 512(sp) - ld s9, 168(sp) # 8-byte Folded Reload - bltu a1, a2, .LBB0_271 -# %bb.270: # %if.then.i397.i - # in Loop: Header=BB0_242 Depth=2 + ld s9, 200(sp) # 8-byte Folded Reload + bltu a1, a2, .LBB0_270 +# %bb.269: # %if.then.i397.i + # in Loop: Header=BB0_241 Depth=2 addi a2, a2, 5 sw a2, 524(sp) slli a1, a2, 32 @@ -2353,12 +2342,12 @@ call xrealloc lw a1, 520(sp) sd a0, 512(sp) -.LBB0_271: # %add_col_elt.exit406.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_270: # %add_col_elt.exit406.i + # in Loop: Header=BB0_241 Depth=2 ld s11, 248(sp) # 8-byte Folded Reload li t2, -1 ld s5, 440(sp) # 8-byte Folded Reload - ld s2, 88(sp) # 8-byte Folded Reload + ld s2, 80(sp) # 8-byte Folded Reload mv a4, s8 li s8, 0 slli a2, a1, 32 @@ -2370,16 +2359,16 @@ li t5, 1 mv s0, s4 lw a3, 36(s11) - bnez a0, .LBB0_418 -.LBB0_272: # %greedy.exit.if.end446_crit_edge - # in Loop: Header=BB0_242 Depth=2 + bnez a0, .LBB0_417 +.LBB0_271: # %greedy.exit.if.end446_crit_edge + # in Loop: Header=BB0_241 Depth=2 ld t6, 8(s11) - j .LBB0_421 -.LBB0_273: # in Loop: Header=BB0_242 Depth=2 + j .LBB0_420 +.LBB0_272: # in Loop: Header=BB0_241 Depth=2 li t5, 1 mv s0, s4 -.LBB0_274: # %grow_exon_left.exit515 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_273: # %grow_exon_left.exit515 + # in Loop: Header=BB0_241 Depth=2 addi a2, a2, -1 slli a2, a2, 32 srli a2, a2, 29 @@ -2388,19 +2377,19 @@ lwu a5, 8(a2) lw a3, 16(s11) sext.w a4, a5 - bgeu a4, a3, .LBB0_281 -# %bb.275: # %land.lhs.true.lr.ph.i521 - # in Loop: Header=BB0_242 Depth=2 + bgeu a4, a3, .LBB0_280 +# %bb.274: # %land.lhs.true.lr.ph.i521 + # in Loop: Header=BB0_241 Depth=2 lwu a6, 12(a2) ld a4, 160(sp) # 8-byte Folded Reload lw a7, 0(a4) sext.w a4, a6 - bltu a7, a4, .LBB0_277 -# %bb.276: # %land.lhs.true.lr.ph.i521 - # in Loop: Header=BB0_242 Depth=2 + bltu a7, a4, .LBB0_276 +# %bb.275: # %land.lhs.true.lr.ph.i521 + # in Loop: Header=BB0_241 Depth=2 mv a4, a7 -.LBB0_277: # %land.lhs.true.lr.ph.i521 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_276: # %land.lhs.true.lr.ph.i521 + # in Loop: Header=BB0_241 Depth=2 slli a4, a4, 32 srli a7, a4, 32 negw a3, a3 @@ -2409,18 +2398,18 @@ addi a5, a6, 1 add a1, a1, a6 sub a6, a7, a6 -.LBB0_278: # %land.lhs.true.i525 +.LBB0_277: # %land.lhs.true.i525 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 - beqz a6, .LBB0_281 -# %bb.279: # %land.rhs.i529 - # in Loop: Header=BB0_278 Depth=3 + beqz a6, .LBB0_280 +# %bb.278: # %land.rhs.i529 + # in Loop: Header=BB0_277 Depth=3 lbu a7, 0(a0) lbu t0, 0(a1) - bne a7, t0, .LBB0_281 -# %bb.280: # %while.body.i533 - # in Loop: Header=BB0_278 Depth=3 + bne a7, t0, .LBB0_280 +# %bb.279: # %while.body.i533 + # in Loop: Header=BB0_277 Depth=3 sw a4, 8(a2) sw a5, 12(a2) addi a4, a4, 1 @@ -2429,12 +2418,12 @@ addi a5, a5, 1 addi a1, a1, 1 addi a6, a6, -1 - bne a7, t5, .LBB0_278 -.LBB0_281: # %grow_exon_right.exit537 - # in Loop: Header=BB0_242 Depth=2 + bne a7, t5, .LBB0_277 +.LBB0_280: # %grow_exon_right.exit537 + # in Loop: Header=BB0_241 Depth=2 lw a3, 36(s11) -.LBB0_282: # %cleanup492.sink.split - # in Loop: Header=BB0_242 Depth=2 +.LBB0_281: # %cleanup492.sink.split + # in Loop: Header=BB0_241 Depth=2 addi a1, sp, 512 mv a0, s5 ld a2, 288(sp) # 8-byte Folded Reload @@ -2442,13 +2431,13 @@ sw zero, 520(sp) mv a3, s0 li t2, -1 -.LBB0_283: # %cleanup492 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_282: # %cleanup492 + # in Loop: Header=BB0_241 Depth=2 lw a0, 16(s8) li t5, 1 - j .LBB0_241 -.LBB0_284: # %if.then8.i - # in Loop: Header=BB0_242 Depth=2 + j .LBB0_240 +.LBB0_283: # %if.then8.i + # in Loop: Header=BB0_241 Depth=2 .Lpcrel_hi10: auipc a0, %pcrel_hi(.LCPI0_3) fld fa3, %pcrel_lo(.Lpcrel_hi10)(a0) @@ -2457,27 +2446,27 @@ fmul.d fa3, fs2, fa3 flt.d a0, fa3, fa4 ld s5, 440(sp) # 8-byte Folded Reload - bnez a0, .LBB0_286 -# %bb.285: # %if.then8.i - # in Loop: Header=BB0_242 Depth=2 + bnez a0, .LBB0_285 +# %bb.284: # %if.then8.i + # in Loop: Header=BB0_241 Depth=2 fmv.d fa3, fa4 -.LBB0_286: # %if.then8.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_285: # %if.then8.i + # in Loop: Header=BB0_241 Depth=2 fle.d a0, fa5, fa3 - bnez a0, .LBB0_295 -# %bb.287: # %if.else.i423 - # in Loop: Header=BB0_242 Depth=2 + bnez a0, .LBB0_294 +# %bb.286: # %if.else.i423 + # in Loop: Header=BB0_241 Depth=2 ld s8, 448(sp) # 8-byte Folded Reload addi s8, s8, 1 - j .LBB0_417 -.LBB0_288: # in Loop: Header=BB0_242 Depth=2 + j .LBB0_416 +.LBB0_287: # in Loop: Header=BB0_241 Depth=2 mv s9, a1 -.LBB0_289: # %if.end70.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_288: # %if.end70.i + # in Loop: Header=BB0_241 Depth=2 sd t1, 456(sp) # 8-byte Folded Spill sd t0, 432(sp) # 8-byte Folded Spill sd t6, 424(sp) # 8-byte Folded Spill - sd a7, 80(sp) # 8-byte Folded Spill + sd a7, 72(sp) # 8-byte Folded Spill sd a6, 112(sp) # 8-byte Folded Spill sd a5, 120(sp) # 8-byte Folded Spill sd a4, 96(sp) # 8-byte Folded Spill @@ -2496,12 +2485,12 @@ mv t0, a0 ld a2, 224(sp) # 8-byte Folded Reload mv a0, a2 - bltu a1, a2, .LBB0_291 -# %bb.290: # %if.end70.i - # in Loop: Header=BB0_242 Depth=2 + bltu a1, a2, .LBB0_290 +# %bb.289: # %if.end70.i + # in Loop: Header=BB0_241 Depth=2 li a0, 8 -.LBB0_291: # %if.end70.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_290: # %if.end70.i + # in Loop: Header=BB0_241 Depth=2 ld a6, 120(sp) # 8-byte Folded Reload ld a7, 112(sp) # 8-byte Folded Reload subw s10, a6, a7 @@ -2510,21 +2499,21 @@ addi s4, a1, 1 slli s6, a1, 2 ld a4, 56(sp) # 8-byte Folded Reload - bgeu s4, a0, .LBB0_293 -# %bb.292: # in Loop: Header=BB0_242 Depth=2 + bgeu s4, a0, .LBB0_292 +# %bb.291: # in Loop: Header=BB0_241 Depth=2 li a0, 0 ld t1, 416(sp) # 8-byte Folded Reload - j .LBB0_301 -.LBB0_293: # %vector.memcheck1053 - # in Loop: Header=BB0_242 Depth=2 + j .LBB0_300 +.LBB0_292: # %vector.memcheck1053 + # in Loop: Header=BB0_241 Depth=2 ld t1, 416(sp) # 8-byte Folded Reload sub a0, t0, t1 - bgeu a0, a4, .LBB0_298 -# %bb.294: # in Loop: Header=BB0_242 Depth=2 + bgeu a0, a4, .LBB0_297 +# %bb.293: # in Loop: Header=BB0_241 Depth=2 li a0, 0 - j .LBB0_301 -.LBB0_295: # %if.then23.i - # in Loop: Header=BB0_242 Depth=2 + j .LBB0_300 +.LBB0_294: # %if.then23.i + # in Loop: Header=BB0_241 Depth=2 sd a4, 96(sp) # 8-byte Folded Spill mv s8, s2 sd s0, 104(sp) # 8-byte Folded Spill @@ -2542,9 +2531,9 @@ lw a2, 524(sp) lw a1, 520(sp) ld a0, 512(sp) - bltu a1, a2, .LBB0_297 -# %bb.296: # %if.then.i.i - # in Loop: Header=BB0_242 Depth=2 + bltu a1, a2, .LBB0_296 +# %bb.295: # %if.then.i.i + # in Loop: Header=BB0_241 Depth=2 addi a2, a2, 5 sw a2, 524(sp) slli a1, a2, 32 @@ -2552,8 +2541,8 @@ call xrealloc lw a1, 520(sp) sd a0, 512(sp) -.LBB0_297: # %add_col_elt.exit.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_296: # %add_col_elt.exit.i + # in Loop: Header=BB0_241 Depth=2 li t2, -1 mv s2, s8 ld a4, 96(sp) # 8-byte Folded Reload @@ -2570,9 +2559,9 @@ li t5, 1 ld s0, 104(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload - j .LBB0_417 -.LBB0_298: # %vector.ph1060 - # in Loop: Header=BB0_242 Depth=2 + j .LBB0_416 +.LBB0_297: # %vector.ph1060 + # in Loop: Header=BB0_241 Depth=2 ld a0, 312(sp) # 8-byte Folded Reload srli a0, a0, 1 neg a0, a0 @@ -2583,77 +2572,77 @@ mv a2, t0 mv a3, t1 ld a5, 224(sp) # 8-byte Folded Reload -.LBB0_299: # %vector.body1065 +.LBB0_298: # %vector.body1065 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 vs2r.v v8, (a3) vs2r.v v8, (a2) add a3, a3, a4 sub a1, a1, a5 add a2, a2, a4 - bnez a1, .LBB0_299 -# %bb.300: # %middle.block1057 - # in Loop: Header=BB0_242 Depth=2 - beq s4, a0, .LBB0_303 -.LBB0_301: # %for.body85.i.preheader - # in Loop: Header=BB0_242 Depth=2 + bnez a1, .LBB0_298 +# %bb.299: # %middle.block1057 + # in Loop: Header=BB0_241 Depth=2 + beq s4, a0, .LBB0_302 +.LBB0_300: # %for.body85.i.preheader + # in Loop: Header=BB0_241 Depth=2 slli a1, a0, 2 add a0, t0, a1 add a1, t1, a1 add a2, t0, s6 addi a2, a2, 4 -.LBB0_302: # %for.body85.i +.LBB0_301: # %for.body85.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 sw s10, 0(a1) sw s10, 0(a0) addi a0, a0, 4 addi a1, a1, 4 - bne a0, a2, .LBB0_302 -.LBB0_303: # %for.end93.i - # in Loop: Header=BB0_242 Depth=2 + bne a0, a2, .LBB0_301 +.LBB0_302: # %for.end93.i + # in Loop: Header=BB0_241 Depth=2 ld a0, 448(sp) # 8-byte Folded Reload slli a4, a0, 2 add t1, t1, a4 sw s9, 0(t1) mv a0, s1 - bltu s1, s7, .LBB0_305 -# %bb.304: # %for.end93.i - # in Loop: Header=BB0_242 Depth=2 + bltu s1, s7, .LBB0_304 +# %bb.303: # %for.end93.i + # in Loop: Header=BB0_241 Depth=2 mv a0, s7 -.LBB0_305: # %for.end93.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_304: # %for.end93.i + # in Loop: Header=BB0_241 Depth=2 li s9, 0 - beqz s1, .LBB0_310 -# %bb.306: # %land.rhs107.preheader.i - # in Loop: Header=BB0_242 Depth=2 + beqz s1, .LBB0_309 +# %bb.305: # %land.rhs107.preheader.i + # in Loop: Header=BB0_241 Depth=2 slli a1, a0, 32 srli a1, a1, 32 -.LBB0_307: # %land.rhs107.i +.LBB0_306: # %land.rhs107.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 lbu a2, 0(s11) lbu a3, 0(s8) - bne a2, a3, .LBB0_310 -# %bb.308: # %for.inc118.i - # in Loop: Header=BB0_307 Depth=3 + bne a2, a3, .LBB0_309 +# %bb.307: # %for.inc118.i + # in Loop: Header=BB0_306 Depth=3 addiw s9, s9, 1 addi a1, a1, -1 addi s8, s8, 1 addi s11, s11, 1 - bnez a1, .LBB0_307 -# %bb.309: # in Loop: Header=BB0_242 Depth=2 + bnez a1, .LBB0_306 +# %bb.308: # in Loop: Header=BB0_241 Depth=2 mv s9, a0 -.LBB0_310: # %for.end120.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_309: # %for.end120.i + # in Loop: Header=BB0_241 Depth=2 sd t0, 464(sp) # 8-byte Folded Spill - bne s9, s7, .LBB0_314 -# %bb.311: # %if.then123.i - # in Loop: Header=BB0_242 Depth=2 - ld s3, 80(sp) # 8-byte Folded Reload + bne s9, s7, .LBB0_313 +# %bb.310: # %if.then123.i + # in Loop: Header=BB0_241 Depth=2 + ld s3, 72(sp) # 8-byte Folded Reload addi s1, s3, 1 addi s2, a7, 1 add s3, s3, s7 @@ -2669,9 +2658,9 @@ lw a1, 520(sp) ld a0, 512(sp) ld s11, 248(sp) # 8-byte Folded Reload - bltu a1, a2, .LBB0_313 -# %bb.312: # %if.then.i415.i - # in Loop: Header=BB0_242 Depth=2 + bltu a1, a2, .LBB0_312 +# %bb.311: # %if.then.i415.i + # in Loop: Header=BB0_241 Depth=2 addi a2, a2, 5 sw a2, 524(sp) slli a1, a2, 32 @@ -2679,13 +2668,13 @@ call xrealloc lw a1, 520(sp) sd a0, 512(sp) -.LBB0_313: # %add_col_elt.exit424.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_312: # %add_col_elt.exit424.i + # in Loop: Header=BB0_241 Depth=2 ld s10, 264(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload + ld s9, 200(sp) # 8-byte Folded Reload li s6, 2 ld s5, 440(sp) # 8-byte Folded Reload - ld s2, 88(sp) # 8-byte Folded Reload + ld s2, 80(sp) # 8-byte Folded Reload ld a3, 416(sp) # 8-byte Folded Reload addi a2, a1, 1 sw a2, 520(sp) @@ -2701,9 +2690,9 @@ li t5, 1 li t2, -1 ld s0, 104(sp) # 8-byte Folded Reload - j .LBB0_416 -.LBB0_314: # %if.end129.i - # in Loop: Header=BB0_242 Depth=2 + j .LBB0_415 +.LBB0_313: # %if.end129.i + # in Loop: Header=BB0_241 Depth=2 sd a4, 400(sp) # 8-byte Folded Spill sd t1, 384(sp) # 8-byte Folded Spill mv a0, s0 @@ -2711,44 +2700,44 @@ mv s8, a0 mv a0, s0 call xmalloc - ld a1, 312(sp) # 8-byte Folded Reload - srli a1, a1, 1 + ld a3, 312(sp) # 8-byte Folded Reload + srli a1, a3, 1 li a2, 8 mv s11, a0 - bltu a2, a1, .LBB0_316 -# %bb.315: # %if.end129.i - # in Loop: Header=BB0_242 Depth=2 + bltu a2, a1, .LBB0_315 +# %bb.314: # %if.end129.i + # in Loop: Header=BB0_241 Depth=2 li a1, 8 -.LBB0_316: # %if.end129.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_315: # %if.end129.i + # in Loop: Header=BB0_241 Depth=2 ld a4, 56(sp) # 8-byte Folded Reload li a6, -1 ld a7, 456(sp) # 8-byte Folded Reload - bltu s4, a1, .LBB0_318 -# %bb.317: # %vector.memcheck1038 - # in Loop: Header=BB0_242 Depth=2 + bltu s4, a1, .LBB0_317 +# %bb.316: # %vector.memcheck1038 + # in Loop: Header=BB0_241 Depth=2 sub a0, s11, s8 - bgeu a0, a4, .LBB0_436 -.LBB0_318: # in Loop: Header=BB0_242 Depth=2 + bgeu a0, a4, .LBB0_435 +.LBB0_317: # in Loop: Header=BB0_241 Depth=2 li a0, 0 -.LBB0_319: # %for.body144.i.preheader - # in Loop: Header=BB0_242 Depth=2 +.LBB0_318: # %for.body144.i.preheader + # in Loop: Header=BB0_241 Depth=2 slli a1, a0, 2 add a0, s11, a1 add a1, s8, a1 add a2, s11, s6 addi a2, a2, 4 -.LBB0_320: # %for.body144.i +.LBB0_319: # %for.body144.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 sw a6, 0(a1) sw a6, 0(a0) addi a0, a0, 4 addi a1, a1, 4 - bne a0, a2, .LBB0_320 -.LBB0_321: # %for.end151.i - # in Loop: Header=BB0_242 Depth=2 + bne a0, a2, .LBB0_319 +.LBB0_320: # %for.end151.i + # in Loop: Header=BB0_241 Depth=2 ld s4, 448(sp) # 8-byte Folded Reload subw a0, a7, s4 sd a0, 456(sp) # 8-byte Folded Spill @@ -2774,9 +2763,9 @@ ld a2, 304(sp) # 8-byte Folded Reload sd a0, 272(sp) # 8-byte Folded Spill sd s6, 256(sp) # 8-byte Folded Spill - beqz s4, .LBB0_401 -# %bb.322: # %for.body175.lr.ph.i - # in Loop: Header=BB0_242 Depth=2 + beqz s4, .LBB0_400 +# %bb.321: # %for.body175.lr.ph.i + # in Loop: Header=BB0_241 Depth=2 mv s9, s6 li a1, 2 ld a3, 368(sp) # 8-byte Folded Reload @@ -2784,42 +2773,43 @@ li t6, 1 li s6, 2 mv t5, a3 - bltu a1, a3, .LBB0_324 -# %bb.323: # %for.body175.lr.ph.i - # in Loop: Header=BB0_242 Depth=2 + bltu a1, a3, .LBB0_323 +# %bb.322: # %for.body175.lr.ph.i + # in Loop: Header=BB0_241 Depth=2 li a2, 2 -.LBB0_324: # %for.body175.lr.ph.i - # in Loop: Header=BB0_242 Depth=2 - ld a4, 312(sp) # 8-byte Folded Reload - srli a3, a4, 1 +.LBB0_323: # %for.body175.lr.ph.i + # in Loop: Header=BB0_241 Depth=2 + ld a3, 312(sp) # 8-byte Folded Reload + srli a3, a3, 1 li a1, 8 - addi a5, sp, 576 - vl2r.v v10, (a5) # Unknown-size Folded Reload + addi a4, sp, 576 + vl2r.v v10, (a4) # Unknown-size Folded Reload ld a7, 56(sp) # 8-byte Folded Reload li t1, -1 ld t2, 464(sp) # 8-byte Folded Reload ld t4, 400(sp) # 8-byte Folded Reload mv s4, s8 mv s8, s9 - bltu a1, a3, .LBB0_326 -# %bb.325: # %for.body175.lr.ph.i - # in Loop: Header=BB0_242 Depth=2 + bltu a1, a3, .LBB0_325 +# %bb.324: # %for.body175.lr.ph.i + # in Loop: Header=BB0_241 Depth=2 li a3, 8 -.LBB0_326: # %for.body175.lr.ph.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_325: # %for.body175.lr.ph.i + # in Loop: Header=BB0_241 Depth=2 slli a2, a2, 32 srli t3, a2, 32 addi a2, t3, -1 li a1, 1 - bltu a2, a3, .LBB0_331 -# %bb.327: # %vector.memcheck - # in Loop: Header=BB0_242 Depth=2 + bltu a2, a3, .LBB0_330 +# %bb.326: # %vector.memcheck + # in Loop: Header=BB0_241 Depth=2 ld a3, 304(sp) # 8-byte Folded Reload sub a3, a3, s0 - bltu a3, a7, .LBB0_331 -# %bb.328: # %vector.ph1029 - # in Loop: Header=BB0_242 Depth=2 - srli a1, a4, 1 + bltu a3, a7, .LBB0_330 +# %bb.327: # %vector.ph1029 + # in Loop: Header=BB0_241 Depth=2 + ld a1, 312(sp) # 8-byte Folded Reload + srli a1, a1, 1 neg a3, a1 and a3, a2, a3 addi a1, a3, 1 @@ -2830,38 +2820,38 @@ addi a5, a5, 4 mv a6, a3 ld t0, 224(sp) # 8-byte Folded Reload -.LBB0_329: # %vector.body1035 +.LBB0_328: # %vector.body1035 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 vs2r.v v8, (a4) vs2r.v v10, (a5) add a4, a4, a7 sub a6, a6, t0 add a5, a5, a7 - bnez a6, .LBB0_329 -# %bb.330: # %middle.block1026 - # in Loop: Header=BB0_242 Depth=2 - beq a2, a3, .LBB0_333 -.LBB0_331: # %for.body175.i.preheader - # in Loop: Header=BB0_242 Depth=2 + bnez a6, .LBB0_328 +# %bb.329: # %middle.block1026 + # in Loop: Header=BB0_241 Depth=2 + beq a2, a3, .LBB0_332 +.LBB0_330: # %for.body175.i.preheader + # in Loop: Header=BB0_241 Depth=2 slli a2, a1, 2 ld a3, 304(sp) # 8-byte Folded Reload add a1, a3, a2 add a2, s0, a2 slli t3, t3, 2 add a3, a3, t3 -.LBB0_332: # %for.body175.i +.LBB0_331: # %for.body175.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 sw s10, 0(a2) sw t1, 0(a1) addi a1, a1, 4 addi a2, a2, 4 - bne a1, a3, .LBB0_332 -.LBB0_333: # %for.cond196.preheader.lr.ph.i - # in Loop: Header=BB0_242 Depth=2 + bne a1, a3, .LBB0_331 +.LBB0_332: # %for.cond196.preheader.lr.ph.i + # in Loop: Header=BB0_241 Depth=2 ld a1, 384(sp) # 8-byte Folded Reload lw a1, 0(a1) sw a1, 0(s0) @@ -2874,23 +2864,23 @@ sw a5, 0(s8) ld a2, 416(sp) # 8-byte Folded Reload addi a0, a2, -4 - sd a0, 200(sp) # 8-byte Folded Spill - addi a0, a2, 4 sd a0, 192(sp) # 8-byte Folded Spill + addi a0, a2, 4 + sd a0, 184(sp) # 8-byte Folded Spill add a0, t2, t4 sd a0, 240(sp) # 8-byte Folded Spill ld a4, 408(sp) # 8-byte Folded Reload addi a0, a4, -4 - sd a0, 184(sp) # 8-byte Folded Spill - addi a0, a4, 4 sd a0, 176(sp) # 8-byte Folded Spill + addi a0, a4, 4 + sd a0, 168(sp) # 8-byte Folded Spill add a0, s11, t4 sd a0, 232(sp) # 8-byte Folded Spill addiw a7, a5, 2 ld a1, 112(sp) # 8-byte Folded Reload add s5, a1, s5 ld a0, 120(sp) # 8-byte Folded Reload - ld a3, 80(sp) # 8-byte Folded Reload + ld a3, 72(sp) # 8-byte Folded Reload add a0, a3, a0 not a0, a0 add a0, a0, s5 @@ -2921,12 +2911,12 @@ li t4, -1 li a0, -1 sd a0, 280(sp) # 8-byte Folded Spill - j .LBB0_336 -.LBB0_334: # in Loop: Header=BB0_336 Depth=3 + j .LBB0_335 +.LBB0_333: # in Loop: Header=BB0_335 Depth=3 mv a5, t3 ld s5, 280(sp) # 8-byte Folded Reload -.LBB0_335: # %for.end527.i - # in Loop: Header=BB0_336 Depth=3 +.LBB0_334: # %for.end527.i + # in Loop: Header=BB0_335 Depth=3 addiw s8, s8, 1 ld a0, 384(sp) # 8-byte Folded Reload addi a0, a0, -1 @@ -2954,28 +2944,28 @@ addi a0, a0, -4 sd a0, 336(sp) # 8-byte Folded Spill mv t4, s9 - bltu a5, s8, .LBB0_402 -.LBB0_336: # %for.cond196.preheader.i + bltu a5, s8, .LBB0_401 +.LBB0_335: # %for.cond196.preheader.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_341 Depth 4 - # Child Loop BB0_353 Depth 5 - # Child Loop BB0_358 Depth 4 - # Child Loop BB0_363 Depth 4 - # Child Loop BB0_369 Depth 4 - # Child Loop BB0_385 Depth 5 - # Child Loop BB0_392 Depth 4 - # Child Loop BB0_397 Depth 4 + # Child Loop BB0_340 Depth 4 + # Child Loop BB0_352 Depth 5 + # Child Loop BB0_357 Depth 4 + # Child Loop BB0_362 Depth 4 + # Child Loop BB0_368 Depth 4 + # Child Loop BB0_384 Depth 5 + # Child Loop BB0_391 Depth 4 + # Child Loop BB0_396 Depth 4 ld a2, 448(sp) # 8-byte Folded Reload subw a3, a2, s8 addw a6, s8, a2 slli a0, s8, 32 sd t5, 368(sp) # 8-byte Folded Spill sd t0, 400(sp) # 8-byte Folded Spill - bge t5, t0, .LBB0_338 -# %bb.337: # %for.end299.thread.i - # in Loop: Header=BB0_336 Depth=3 + bge t5, t0, .LBB0_337 +# %bb.336: # %for.end299.thread.i + # in Loop: Header=BB0_335 Depth=3 ld a4, 240(sp) # 8-byte Folded Reload lw a4, 0(a4) srli t1, a0, 32 @@ -2986,91 +2976,91 @@ add t0, a4, t0 sw a2, 0(t0) ld s5, 304(sp) # 8-byte Folded Reload - bnez s8, .LBB0_361 - j .LBB0_388 -.LBB0_338: # %for.body199.lr.ph.i - # in Loop: Header=BB0_336 Depth=3 + bnez s8, .LBB0_360 + j .LBB0_387 +.LBB0_337: # %for.body199.lr.ph.i + # in Loop: Header=BB0_335 Depth=3 slli t1, a6, 2 - ld a2, 200(sp) # 8-byte Folded Reload + ld a2, 192(sp) # 8-byte Folded Reload add t1, a2, t1 slli t3, a3, 2 - ld a2, 192(sp) # 8-byte Folded Reload + ld a2, 184(sp) # 8-byte Folded Reload add t3, a2, t3 ld s4, 360(sp) # 8-byte Folded Reload ld s5, 384(sp) # 8-byte Folded Reload - j .LBB0_341 -.LBB0_339: # in Loop: Header=BB0_341 Depth=4 + j .LBB0_340 +.LBB0_338: # in Loop: Header=BB0_340 Depth=4 mv ra, a4 -.LBB0_340: # %while.end.i403 - # in Loop: Header=BB0_341 Depth=4 +.LBB0_339: # %while.end.i403 + # in Loop: Header=BB0_340 Depth=4 ld a2, 464(sp) # 8-byte Folded Reload add s9, a2, s9 sw ra, 0(s9) addiw a4, s5, 1 addi s5, s5, 1 addi s4, s4, 1 - beq a7, a4, .LBB0_356 -.LBB0_341: # %for.body199.i + beq a7, a4, .LBB0_355 +.LBB0_340: # %for.body199.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 - # Parent Loop BB0_336 Depth=3 + # Parent Loop BB0_241 Depth=2 + # Parent Loop BB0_335 Depth=3 # => This Loop Header: Depth=4 - # Child Loop BB0_353 Depth 5 + # Child Loop BB0_352 Depth 5 slli s9, s5, 2 - bne s5, a3, .LBB0_343 -# %bb.342: # %if.then205.i - # in Loop: Header=BB0_341 Depth=4 + bne s5, a3, .LBB0_342 +# %bb.341: # %if.then205.i + # in Loop: Header=BB0_340 Depth=4 lw ra, 0(t3) - j .LBB0_351 -.LBB0_343: # %if.else209.i - # in Loop: Header=BB0_341 Depth=4 - bne s5, a6, .LBB0_345 -# %bb.344: # %if.then214.i - # in Loop: Header=BB0_341 Depth=4 + j .LBB0_350 +.LBB0_342: # %if.else209.i + # in Loop: Header=BB0_340 Depth=4 + bne s5, a6, .LBB0_344 +# %bb.343: # %if.then214.i + # in Loop: Header=BB0_340 Depth=4 lw ra, 0(t1) addiw ra, ra, -1 - j .LBB0_351 -.LBB0_345: # %if.else219.i - # in Loop: Header=BB0_341 Depth=4 + j .LBB0_350 +.LBB0_344: # %if.else219.i + # in Loop: Header=BB0_340 Depth=4 ld t0, 416(sp) # 8-byte Folded Reload add t0, t0, s9 lw a4, 0(t0) lw t2, 4(t0) - bge t2, a4, .LBB0_347 -# %bb.346: # %if.else219.if.else240_crit_edge.i - # in Loop: Header=BB0_341 Depth=4 + bge t2, a4, .LBB0_346 +# %bb.345: # %if.else219.if.else240_crit_edge.i + # in Loop: Header=BB0_340 Depth=4 lw t0, -4(t0) - j .LBB0_348 -.LBB0_347: # %land.lhs.true227.i - # in Loop: Header=BB0_341 Depth=4 + j .LBB0_347 +.LBB0_346: # %land.lhs.true227.i + # in Loop: Header=BB0_340 Depth=4 lw t0, -4(t0) addiw ra, a4, -1 - bge t0, ra, .LBB0_351 -.LBB0_348: # %if.else240.i - # in Loop: Header=BB0_341 Depth=4 + bge t0, ra, .LBB0_350 +.LBB0_347: # %if.else240.i + # in Loop: Header=BB0_340 Depth=4 slt t5, t0, t2 slt a4, t0, a4 and a4, t5, a4 - beqz a4, .LBB0_350 -# %bb.349: # in Loop: Header=BB0_341 Depth=4 + beqz a4, .LBB0_349 +# %bb.348: # in Loop: Header=BB0_340 Depth=4 addiw t2, t0, -1 -.LBB0_350: # %if.else240.i - # in Loop: Header=BB0_341 Depth=4 +.LBB0_349: # %if.else240.i + # in Loop: Header=BB0_340 Depth=4 mv ra, t2 -.LBB0_351: # %if.end271.i - # in Loop: Header=BB0_341 Depth=4 - blez ra, .LBB0_340 -# %bb.352: # %if.end271.i - # in Loop: Header=BB0_341 Depth=4 +.LBB0_350: # %if.end271.i + # in Loop: Header=BB0_340 Depth=4 + blez ra, .LBB0_339 +# %bb.351: # %if.end271.i + # in Loop: Header=BB0_340 Depth=4 ld a4, 456(sp) # 8-byte Folded Reload add a4, a4, s5 addw a4, a4, ra - blez a4, .LBB0_340 -.LBB0_353: # %land.rhs280.i + blez a4, .LBB0_339 +.LBB0_352: # %land.rhs280.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 - # Parent Loop BB0_336 Depth=3 - # Parent Loop BB0_341 Depth=4 + # Parent Loop BB0_241 Depth=2 + # Parent Loop BB0_335 Depth=3 + # Parent Loop BB0_340 Depth=4 # => This Inner Loop Header: Depth=5 mv a4, ra addw t0, s4, ra @@ -3078,17 +3068,17 @@ lbu t2, 0(t2) add t5, s3, t0 lbu t5, 0(t5) - bne t2, t5, .LBB0_339 -# %bb.354: # %while.body292.i - # in Loop: Header=BB0_353 Depth=5 + bne t2, t5, .LBB0_338 +# %bb.353: # %while.body292.i + # in Loop: Header=BB0_352 Depth=5 addiw ra, a4, -1 - blt t0, s6, .LBB0_340 -# %bb.355: # %while.body292.i - # in Loop: Header=BB0_353 Depth=5 - blt t6, a4, .LBB0_353 - j .LBB0_340 -.LBB0_356: # %for.end299.i - # in Loop: Header=BB0_336 Depth=3 + blt t0, s6, .LBB0_339 +# %bb.354: # %while.body292.i + # in Loop: Header=BB0_352 Depth=5 + blt t6, a4, .LBB0_352 + j .LBB0_339 +.LBB0_355: # %for.end299.i + # in Loop: Header=BB0_335 Depth=3 ld a2, 240(sp) # 8-byte Folded Reload lw a4, 0(a2) srli t1, a0, 32 @@ -3104,158 +3094,158 @@ ld t5, 376(sp) # 8-byte Folded Reload ld t6, 400(sp) # 8-byte Folded Reload ld s5, 304(sp) # 8-byte Folded Reload - j .LBB0_358 -.LBB0_357: # %for.inc333.i - # in Loop: Header=BB0_358 Depth=4 + j .LBB0_357 +.LBB0_356: # %for.inc333.i + # in Loop: Header=BB0_357 Depth=4 addi t6, t6, 1 addiw t5, t5, -1 addi s4, s4, 4 addi t2, t2, 4 - beqz t5, .LBB0_360 -.LBB0_358: # %for.body311.i + beqz t5, .LBB0_359 +.LBB0_357: # %for.body311.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 - # Parent Loop BB0_336 Depth=3 + # Parent Loop BB0_241 Depth=2 + # Parent Loop BB0_335 Depth=3 # => This Inner Loop Header: Depth=4 lw a4, 0(t2) sw a4, 0(s4) sw s10, 0(t2) lw a4, 0(s4) lw t0, 0(a0) - bge a4, t0, .LBB0_357 -# %bb.359: # %if.then325.i - # in Loop: Header=BB0_358 Depth=4 + bge a4, t0, .LBB0_356 +# %bb.358: # %if.then325.i + # in Loop: Header=BB0_357 Depth=4 sw a4, 0(a0) sw t6, 0(t3) - j .LBB0_357 -.LBB0_360: # in Loop: Header=BB0_336 Depth=3 + j .LBB0_356 +.LBB0_359: # in Loop: Header=BB0_335 Depth=3 li t6, 1 - beqz s8, .LBB0_388 -.LBB0_361: # %for.body339.lr.ph.i - # in Loop: Header=BB0_336 Depth=3 + beqz s8, .LBB0_387 +.LBB0_360: # %for.body339.lr.ph.i + # in Loop: Header=BB0_335 Depth=3 lw a0, 0(a0) li s9, 0 subw t2, s8, a5 mv s4, s5 mv t5, t1 - j .LBB0_363 -.LBB0_362: # %for.inc359.i - # in Loop: Header=BB0_363 Depth=4 + j .LBB0_362 +.LBB0_361: # %for.inc359.i + # in Loop: Header=BB0_362 Depth=4 addi t5, t5, -1 addiw s9, s9, 1 addi s4, s4, 4 - beqz t5, .LBB0_388 -.LBB0_363: # %for.body339.i + beqz t5, .LBB0_387 +.LBB0_362: # %for.body339.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 - # Parent Loop BB0_336 Depth=3 + # Parent Loop BB0_241 Depth=2 + # Parent Loop BB0_335 Depth=3 # => This Inner Loop Header: Depth=4 lw a4, 0(s4) - blt a4, a0, .LBB0_362 -# %bb.364: # %land.lhs.true346.i - # in Loop: Header=BB0_363 Depth=4 + blt a4, a0, .LBB0_361 +# %bb.363: # %land.lhs.true346.i + # in Loop: Header=BB0_362 Depth=4 addw t3, s8, s9 - bltu t3, a5, .LBB0_366 -# %bb.365: # %land.lhs.true346.i - # in Loop: Header=BB0_363 Depth=4 + bltu t3, a5, .LBB0_365 +# %bb.364: # %land.lhs.true346.i + # in Loop: Header=BB0_362 Depth=4 slti a4, t4, 0 addw t0, t2, s9 seqz t0, t0 and a4, t0, a4 - beqz a4, .LBB0_362 -.LBB0_366: # %land.lhs.true346.i.for.end361.i.loopexit_crit_edge - # in Loop: Header=BB0_336 Depth=3 + beqz a4, .LBB0_361 +.LBB0_365: # %land.lhs.true346.i.for.end361.i.loopexit_crit_edge + # in Loop: Header=BB0_335 Depth=3 sd s8, 280(sp) # 8-byte Folded Spill slli a5, t1, 2 ld t5, 368(sp) # 8-byte Folded Reload ld t0, 400(sp) # 8-byte Folded Reload - blt t5, t0, .LBB0_389 -.LBB0_367: # %for.body367.lr.ph.i - # in Loop: Header=BB0_336 Depth=3 + blt t5, t0, .LBB0_388 +.LBB0_366: # %for.body367.lr.ph.i + # in Loop: Header=BB0_335 Depth=3 slli t4, a6, 2 - ld a0, 184(sp) # 8-byte Folded Reload + ld a0, 176(sp) # 8-byte Folded Reload add t4, a0, t4 slli s5, a3, 2 - ld a0, 176(sp) # 8-byte Folded Reload + ld a0, 168(sp) # 8-byte Folded Reload add s5, a0, s5 ld a0, 384(sp) # 8-byte Folded Reload ld ra, 328(sp) # 8-byte Folded Reload - j .LBB0_369 -.LBB0_368: # %if.end462.i - # in Loop: Header=BB0_369 Depth=4 + j .LBB0_368 +.LBB0_367: # %if.end462.i + # in Loop: Header=BB0_368 Depth=4 add t1, s11, t1 sw s4, 0(t1) addiw ra, ra, 1 addi a0, a0, 1 li t6, 1 li s6, 2 - beq ra, a1, .LBB0_390 -.LBB0_369: # %for.body367.i + beq ra, a1, .LBB0_389 +.LBB0_368: # %for.body367.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 - # Parent Loop BB0_336 Depth=3 + # Parent Loop BB0_241 Depth=2 + # Parent Loop BB0_335 Depth=3 # => This Loop Header: Depth=4 - # Child Loop BB0_385 Depth 5 + # Child Loop BB0_384 Depth 5 slli t1, a0, 2 - bne a0, a3, .LBB0_371 -# %bb.370: # %if.then372.i - # in Loop: Header=BB0_369 Depth=4 + bne a0, a3, .LBB0_370 +# %bb.369: # %if.then372.i + # in Loop: Header=BB0_368 Depth=4 lw s4, 0(s5) addiw s4, s4, 1 - j .LBB0_381 -.LBB0_371: # %if.else377.i - # in Loop: Header=BB0_369 Depth=4 - bne a0, a6, .LBB0_373 -# %bb.372: # %if.then381.i - # in Loop: Header=BB0_369 Depth=4 + j .LBB0_380 +.LBB0_370: # %if.else377.i + # in Loop: Header=BB0_368 Depth=4 + bne a0, a6, .LBB0_372 +# %bb.371: # %if.then381.i + # in Loop: Header=BB0_368 Depth=4 lw s4, 0(t4) - j .LBB0_381 -.LBB0_373: # %if.else385.i - # in Loop: Header=BB0_369 Depth=4 + j .LBB0_380 +.LBB0_372: # %if.else385.i + # in Loop: Header=BB0_368 Depth=4 ld a4, 408(sp) # 8-byte Folded Reload add a4, a4, t1 lw t2, 0(a4) lw t5, 4(a4) - bge t2, t5, .LBB0_375 -# %bb.374: # %if.else385.if.else406_crit_edge.i - # in Loop: Header=BB0_369 Depth=4 + bge t2, t5, .LBB0_374 +# %bb.373: # %if.else385.if.else406_crit_edge.i + # in Loop: Header=BB0_368 Depth=4 lw a4, -4(a4) - j .LBB0_376 -.LBB0_375: # %land.lhs.true393.i - # in Loop: Header=BB0_369 Depth=4 + j .LBB0_375 +.LBB0_374: # %land.lhs.true393.i + # in Loop: Header=BB0_368 Depth=4 lw a4, -4(a4) addiw s4, t2, 1 - bge s4, a4, .LBB0_381 -.LBB0_376: # %if.else406.i - # in Loop: Header=BB0_369 Depth=4 + bge s4, a4, .LBB0_380 +.LBB0_375: # %if.else406.i + # in Loop: Header=BB0_368 Depth=4 addiw t0, t5, 1 - blt a4, t0, .LBB0_378 -# %bb.377: # %if.else406.i - # in Loop: Header=BB0_369 Depth=4 + blt a4, t0, .LBB0_377 +# %bb.376: # %if.else406.i + # in Loop: Header=BB0_368 Depth=4 mv t0, a4 -.LBB0_378: # %if.else406.i - # in Loop: Header=BB0_369 Depth=4 - blt t5, t2, .LBB0_380 -# %bb.379: # %if.else406.i - # in Loop: Header=BB0_369 Depth=4 +.LBB0_377: # %if.else406.i + # in Loop: Header=BB0_368 Depth=4 + blt t5, t2, .LBB0_379 +# %bb.378: # %if.else406.i + # in Loop: Header=BB0_368 Depth=4 mv a4, t0 -.LBB0_380: # %if.else406.i - # in Loop: Header=BB0_369 Depth=4 +.LBB0_379: # %if.else406.i + # in Loop: Header=BB0_368 Depth=4 mv s4, a4 -.LBB0_381: # %if.end436.i - # in Loop: Header=BB0_369 Depth=4 - bltz s4, .LBB0_368 +.LBB0_380: # %if.end436.i + # in Loop: Header=BB0_368 Depth=4 + bltz s4, .LBB0_367 +# %bb.381: # %while.cond442.preheader.i + # in Loop: Header=BB0_368 Depth=4 + bgeu s4, s7, .LBB0_367 # %bb.382: # %while.cond442.preheader.i - # in Loop: Header=BB0_369 Depth=4 - bgeu s4, s7, .LBB0_368 -# %bb.383: # %while.cond442.preheader.i - # in Loop: Header=BB0_369 Depth=4 + # in Loop: Header=BB0_368 Depth=4 ld a2, 448(sp) # 8-byte Folded Reload subw a4, a0, a2 addw a4, a4, s4 - bgeu a4, s1, .LBB0_368 -# %bb.384: # %land.rhs448.preheader.i - # in Loop: Header=BB0_369 Depth=4 + bgeu a4, s1, .LBB0_367 +# %bb.383: # %land.rhs448.preheader.i + # in Loop: Header=BB0_368 Depth=4 li s6, 0 add a4, s4, ra slli t0, s4, 32 @@ -3268,37 +3258,37 @@ addi t0, a4, 1 ld a2, 424(sp) # 8-byte Folded Reload add a4, a2, a4 -.LBB0_385: # %land.rhs448.i +.LBB0_384: # %land.rhs448.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 - # Parent Loop BB0_336 Depth=3 - # Parent Loop BB0_369 Depth=4 + # Parent Loop BB0_241 Depth=2 + # Parent Loop BB0_335 Depth=3 + # Parent Loop BB0_368 Depth=4 # => This Inner Loop Header: Depth=5 add t2, t6, s6 lbu t2, 0(t2) add a2, a4, s6 lbu a2, 0(a2) - bne t2, a2, .LBB0_368 -# %bb.386: # %while.body458.i - # in Loop: Header=BB0_385 Depth=5 + bne t2, a2, .LBB0_367 +# %bb.385: # %while.body458.i + # in Loop: Header=BB0_384 Depth=5 add a2, t5, s6 addi s4, s4, 1 - bgeu a2, s7, .LBB0_368 -# %bb.387: # %while.body458.i - # in Loop: Header=BB0_385 Depth=5 + bgeu a2, s7, .LBB0_367 +# %bb.386: # %while.body458.i + # in Loop: Header=BB0_384 Depth=5 add a2, t0, s6 addi s6, s6, 1 - bltu a2, s1, .LBB0_385 - j .LBB0_368 -.LBB0_388: # in Loop: Header=BB0_336 Depth=3 + bltu a2, s1, .LBB0_384 + j .LBB0_367 +.LBB0_387: # in Loop: Header=BB0_335 Depth=3 mv t3, a5 mv s9, t4 slli a5, t1, 2 ld t5, 368(sp) # 8-byte Folded Reload ld t0, 400(sp) # 8-byte Folded Reload - bge t5, t0, .LBB0_367 -.LBB0_389: # %for.end467.thread.i - # in Loop: Header=BB0_336 Depth=3 + bge t5, t0, .LBB0_366 +.LBB0_388: # %for.end467.thread.i + # in Loop: Header=BB0_335 Depth=3 ld a0, 232(sp) # 8-byte Folded Reload lw a3, 0(a0) add a0, s5, a5 @@ -3307,9 +3297,9 @@ add a5, a3, a5 sw a2, 0(a5) ld t4, 320(sp) # 8-byte Folded Reload - j .LBB0_395 -.LBB0_390: # %for.end467.i - # in Loop: Header=BB0_336 Depth=3 + j .LBB0_394 +.LBB0_389: # %for.end467.i + # in Loop: Header=BB0_335 Depth=3 ld a0, 232(sp) # 8-byte Folded Reload lw a2, 0(a0) ld a0, 304(sp) # 8-byte Folded Reload @@ -3325,73 +3315,73 @@ ld t1, 376(sp) # 8-byte Folded Reload ld t2, 400(sp) # 8-byte Folded Reload li t0, -1 - j .LBB0_392 -.LBB0_391: # %for.inc498.i - # in Loop: Header=BB0_392 Depth=4 + j .LBB0_391 +.LBB0_390: # %for.inc498.i + # in Loop: Header=BB0_391 Depth=4 addi t2, t2, 1 addiw t1, t1, -1 addi a6, a6, 4 addi a3, a3, 4 - beqz t1, .LBB0_394 -.LBB0_392: # %for.body477.i + beqz t1, .LBB0_393 +.LBB0_391: # %for.body477.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 - # Parent Loop BB0_336 Depth=3 + # Parent Loop BB0_241 Depth=2 + # Parent Loop BB0_335 Depth=3 # => This Inner Loop Header: Depth=4 lw a2, 0(a3) sw a2, 0(a6) sw t0, 0(a3) lw a4, 0(a6) lw a2, 0(a0) - bge a2, a4, .LBB0_391 -# %bb.393: # %if.then490.i - # in Loop: Header=BB0_392 Depth=4 + bge a2, a4, .LBB0_390 +# %bb.392: # %if.then490.i + # in Loop: Header=BB0_391 Depth=4 sw a4, 0(a0) sw t2, 0(a5) - j .LBB0_391 -.LBB0_394: # in Loop: Header=BB0_336 Depth=3 + j .LBB0_390 +.LBB0_393: # in Loop: Header=BB0_335 Depth=3 ld t5, 368(sp) # 8-byte Folded Reload ld t0, 400(sp) # 8-byte Folded Reload -.LBB0_395: # %for.cond501.preheader.i - # in Loop: Header=BB0_336 Depth=3 +.LBB0_394: # %for.cond501.preheader.i + # in Loop: Header=BB0_335 Depth=3 li s5, 0 lw a0, 0(a0) addiw t0, t0, -1 addiw t5, t5, 1 subw a3, s8, t3 - j .LBB0_397 -.LBB0_396: # %for.inc525.i - # in Loop: Header=BB0_397 Depth=4 + j .LBB0_396 +.LBB0_395: # %for.inc525.i + # in Loop: Header=BB0_396 Depth=4 addiw s5, s5, 1 - bltu s8, s5, .LBB0_334 -.LBB0_397: # %for.body504.i + bltu s8, s5, .LBB0_333 +.LBB0_396: # %for.body504.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 - # Parent Loop BB0_336 Depth=3 + # Parent Loop BB0_241 Depth=2 + # Parent Loop BB0_335 Depth=3 # => This Inner Loop Header: Depth=4 slli a2, s5, 32 srli a2, a2, 30 add a2, s0, a2 lw a2, 0(a2) - blt a0, a2, .LBB0_396 -# %bb.398: # %land.lhs.true511.i - # in Loop: Header=BB0_397 Depth=4 + blt a0, a2, .LBB0_395 +# %bb.397: # %land.lhs.true511.i + # in Loop: Header=BB0_396 Depth=4 addw a5, s8, s5 - bltu a5, t3, .LBB0_400 -# %bb.399: # %land.lhs.true511.i - # in Loop: Header=BB0_397 Depth=4 + bltu a5, t3, .LBB0_399 +# %bb.398: # %land.lhs.true511.i + # in Loop: Header=BB0_396 Depth=4 slti a2, s9, 0 addw a4, a3, s5 seqz a4, a4 and a2, a4, a2 - beqz a2, .LBB0_396 -.LBB0_400: # %land.lhs.true511.i.for.end527.i_crit_edge - # in Loop: Header=BB0_336 Depth=3 + beqz a2, .LBB0_395 +.LBB0_399: # %land.lhs.true511.i.for.end527.i_crit_edge + # in Loop: Header=BB0_335 Depth=3 mv s9, s8 sd s5, 280(sp) # 8-byte Folded Spill - j .LBB0_335 -.LBB0_401: # %for.end183.thread.i - # in Loop: Header=BB0_242 Depth=2 + j .LBB0_334 +.LBB0_400: # %for.end183.thread.i + # in Loop: Header=BB0_241 Depth=2 ld a1, 384(sp) # 8-byte Folded Reload lw a1, 0(a1) sw a1, 0(s0) @@ -3403,12 +3393,12 @@ li s8, 1 li s9, -1 li s6, 2 -.LBB0_402: # %while.end531.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_401: # %while.end531.i + # in Loop: Header=BB0_241 Depth=2 ld a0, 448(sp) # 8-byte Folded Reload - bgeu a0, s8, .LBB0_404 -# %bb.403: # %if.then534.i - # in Loop: Header=BB0_242 Depth=2 + bgeu a0, s8, .LBB0_403 +# %bb.402: # %if.then534.i + # in Loop: Header=BB0_241 Depth=2 ld a0, 416(sp) # 8-byte Folded Reload call free ld a0, 464(sp) # 8-byte Folded Reload @@ -3428,10 +3418,10 @@ li t5, 1 ld s11, 248(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload - j .LBB0_415 -.LBB0_404: # %if.end535.i - # in Loop: Header=BB0_242 Depth=2 + ld s9, 200(sp) # 8-byte Folded Reload + j .LBB0_414 +.LBB0_403: # %if.end535.i + # in Loop: Header=BB0_241 Depth=2 slli a1, s5, 2 add a0, s0, a1 lw a2, 0(a0) @@ -3442,22 +3432,22 @@ subw a3, s7, a2 ld s10, 264(sp) # 8-byte Folded Reload ld a0, 416(sp) # 8-byte Folded Reload - blt a3, s1, .LBB0_406 -# %bb.405: # %if.end535.i - # in Loop: Header=BB0_242 Depth=2 + blt a3, s1, .LBB0_405 +# %bb.404: # %if.end535.i + # in Loop: Header=BB0_241 Depth=2 mv s1, a2 -.LBB0_406: # %if.end535.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_405: # %if.end535.i + # in Loop: Header=BB0_241 Depth=2 ld a2, 272(sp) # 8-byte Folded Reload add a1, a2, a1 lw s3, 0(a1) ld s4, 120(sp) # 8-byte Folded Reload ld a2, 112(sp) # 8-byte Folded Reload - ld a1, 80(sp) # 8-byte Folded Reload + ld a1, 72(sp) # 8-byte Folded Reload ld s8, 408(sp) # 8-byte Folded Reload - beqz s1, .LBB0_410 -# %bb.407: # %if.then580.i - # in Loop: Header=BB0_242 Depth=2 + beqz s1, .LBB0_409 +# %bb.406: # %if.then580.i + # in Loop: Header=BB0_241 Depth=2 ld a0, 256(sp) # 8-byte Folded Reload add a0, a0, a4 lw a0, 0(a0) @@ -3478,9 +3468,9 @@ lw a2, 524(sp) lw a1, 520(sp) ld a0, 512(sp) - bltu a1, a2, .LBB0_409 -# %bb.408: # %if.then.i433.i - # in Loop: Header=BB0_242 Depth=2 + bltu a1, a2, .LBB0_408 +# %bb.407: # %if.then.i433.i + # in Loop: Header=BB0_241 Depth=2 addi a2, a2, 5 sw a2, 524(sp) slli a1, a2, 32 @@ -3488,8 +3478,8 @@ call xrealloc lw a1, 520(sp) sd a0, 512(sp) -.LBB0_409: # %add_col_elt.exit442.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_408: # %add_col_elt.exit442.i + # in Loop: Header=BB0_241 Depth=2 addi a2, a1, 1 sw a2, 520(sp) slli a1, a1, 32 @@ -3501,13 +3491,13 @@ ld s4, 120(sp) # 8-byte Folded Reload ld a2, 112(sp) # 8-byte Folded Reload ld a0, 416(sp) # 8-byte Folded Reload - ld a1, 80(sp) # 8-byte Folded Reload + ld a1, 72(sp) # 8-byte Folded Reload ld s8, 408(sp) # 8-byte Folded Reload -.LBB0_410: # %if.end586.i - # in Loop: Header=BB0_242 Depth=2 - bgeu s1, s7, .LBB0_414 -# %bb.411: # %if.then589.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_409: # %if.end586.i + # in Loop: Header=BB0_241 Depth=2 + bgeu s1, s7, .LBB0_413 +# %bb.410: # %if.then589.i + # in Loop: Header=BB0_241 Depth=2 ld a0, 456(sp) # 8-byte Folded Reload add a0, a1, a0 add a0, a0, s3 @@ -3527,9 +3517,9 @@ lw a2, 524(sp) lw a1, 520(sp) ld a0, 512(sp) - bltu a1, a2, .LBB0_413 -# %bb.412: # %if.then.i451.i - # in Loop: Header=BB0_242 Depth=2 + bltu a1, a2, .LBB0_412 +# %bb.411: # %if.then.i451.i + # in Loop: Header=BB0_241 Depth=2 addi a2, a2, 5 sw a2, 524(sp) slli a1, a2, 32 @@ -3537,8 +3527,8 @@ call xrealloc lw a1, 520(sp) sd a0, 512(sp) -.LBB0_413: # %add_col_elt.exit460.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_412: # %add_col_elt.exit460.i + # in Loop: Header=BB0_241 Depth=2 addi a2, a1, 1 sw a2, 520(sp) slli a1, a1, 32 @@ -3546,8 +3536,8 @@ add a0, a0, a1 sd s2, 0(a0) ld a0, 416(sp) # 8-byte Folded Reload -.LBB0_414: # %if.end597.i - # in Loop: Header=BB0_242 Depth=2 +.LBB0_413: # %if.end597.i + # in Loop: Header=BB0_241 Depth=2 call free ld a0, 464(sp) # 8-byte Folded Reload call free @@ -3565,24 +3555,24 @@ call free add s8, s9, s5 ld s11, 248(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload + ld s9, 200(sp) # 8-byte Folded Reload li t5, 1 -.LBB0_415: # %greedy.exitthread-pre-split - # in Loop: Header=BB0_242 Depth=2 +.LBB0_414: # %greedy.exitthread-pre-split + # in Loop: Header=BB0_241 Depth=2 li t2, -1 ld s5, 440(sp) # 8-byte Folded Reload ld s0, 104(sp) # 8-byte Folded Reload - ld s2, 88(sp) # 8-byte Folded Reload -.LBB0_416: # %greedy.exitthread-pre-split - # in Loop: Header=BB0_242 Depth=2 + ld s2, 80(sp) # 8-byte Folded Reload +.LBB0_415: # %greedy.exitthread-pre-split + # in Loop: Header=BB0_241 Depth=2 ld a4, 96(sp) # 8-byte Folded Reload -.LBB0_417: # %greedy.exitthread-pre-split - # in Loop: Header=BB0_242 Depth=2 +.LBB0_416: # %greedy.exitthread-pre-split + # in Loop: Header=BB0_241 Depth=2 lw a0, 520(sp) lw a3, 36(s11) - beqz a0, .LBB0_272 -.LBB0_418: # %land.lhs.true407 - # in Loop: Header=BB0_242 Depth=2 + beqz a0, .LBB0_271 +.LBB0_417: # %land.lhs.true407 + # in Loop: Header=BB0_241 Depth=2 ld a1, 208(sp) # 8-byte Folded Reload fld fa5, %pcrel_lo(.Lpcrel_hi6)(a1) ld a1, 296(sp) # 8-byte Folded Reload @@ -3591,28 +3581,28 @@ fcvt.d.w fa2, s7 fmadd.d fa5, fa2, fa3, fa5 flt.d a1, fa4, fa5 - bnez a1, .LBB0_420 -# %bb.419: # %land.lhs.true407 - # in Loop: Header=BB0_242 Depth=2 + bnez a1, .LBB0_419 +# %bb.418: # %land.lhs.true407 + # in Loop: Header=BB0_241 Depth=2 fmv.d fa5, fa4 -.LBB0_420: # %land.lhs.true407 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_419: # %land.lhs.true407 + # in Loop: Header=BB0_241 Depth=2 ld t6, 8(s11) fcvt.d.w fa4, s8 fle.d a1, fa4, fa5 - bnez a1, .LBB0_422 -.LBB0_421: # %if.end446 - # in Loop: Header=BB0_242 Depth=2 + bnez a1, .LBB0_421 +.LBB0_420: # %if.end446 + # in Loop: Header=BB0_241 Depth=2 ld s8, 392(sp) # 8-byte Folded Reload mv s4, s0 li a0, 8 - bltu a3, a0, .LBB0_680 + bltu a3, a0, .LBB0_679 + j .LBB0_247 +.LBB0_679: # %if.end446 + # in Loop: Header=BB0_241 Depth=2 j .LBB0_248 -.LBB0_680: # %if.end446 - # in Loop: Header=BB0_242 Depth=2 - j .LBB0_249 -.LBB0_422: # %if.then425 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_421: # %if.then425 + # in Loop: Header=BB0_241 Depth=2 ld a6, 512(sp) ld a2, 0(a6) lwu t0, 0(a2) @@ -3620,45 +3610,45 @@ add a4, t6, t0 addi a5, a4, -2 ld s8, 392(sp) # 8-byte Folded Reload - bltu a5, t6, .LBB0_429 -# %bb.423: # %if.then425 - # in Loop: Header=BB0_242 Depth=2 + bltu a5, t6, .LBB0_428 +# %bb.422: # %if.then425 + # in Loop: Header=BB0_241 Depth=2 lwu a7, 4(a2) add a5, a1, a7 addi t1, a5, -2 - bltu t1, a1, .LBB0_429 -# %bb.424: # %land.rhs.i434.preheader - # in Loop: Header=BB0_242 Depth=2 + bltu t1, a1, .LBB0_428 +# %bb.423: # %land.rhs.i434.preheader + # in Loop: Header=BB0_241 Depth=2 li a6, 0 addi a7, a7, -1 addi t0, t0, -1 -.LBB0_425: # %land.rhs.i434 +.LBB0_424: # %land.rhs.i434 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 add t1, a5, a6 add t2, a4, a6 lbu t3, -2(t2) lbu t4, -2(t1) - bne t3, t4, .LBB0_428 -# %bb.426: # %while.body.i439 - # in Loop: Header=BB0_425 Depth=3 + bne t3, t4, .LBB0_427 +# %bb.425: # %while.body.i439 + # in Loop: Header=BB0_424 Depth=3 addi t2, t2, -3 sw t0, 0(a2) sw a7, 4(a2) - bltu t2, t6, .LBB0_428 -# %bb.427: # %while.body.i439 - # in Loop: Header=BB0_425 Depth=3 + bltu t2, t6, .LBB0_427 +# %bb.426: # %while.body.i439 + # in Loop: Header=BB0_424 Depth=3 addi t1, t1, -3 addi a7, a7, -1 addi t0, t0, -1 addi a6, a6, -1 - bgeu t1, a1, .LBB0_425 -.LBB0_428: # %grow_exon_left.exit447.loopexit - # in Loop: Header=BB0_242 Depth=2 + bgeu t1, a1, .LBB0_424 +.LBB0_427: # %grow_exon_left.exit447.loopexit + # in Loop: Header=BB0_241 Depth=2 ld a6, 512(sp) -.LBB0_429: # %grow_exon_left.exit447 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_428: # %grow_exon_left.exit447 + # in Loop: Header=BB0_241 Depth=2 addi a0, a0, -1 slli a0, a0, 32 srli a0, a0, 29 @@ -3667,19 +3657,19 @@ lwu a5, 8(a0) lw a2, 16(s11) sext.w a4, a5 - bgeu a4, a2, .LBB0_282 -# %bb.430: # %land.lhs.true.lr.ph.i453 - # in Loop: Header=BB0_242 Depth=2 + bgeu a4, a2, .LBB0_281 +# %bb.429: # %land.lhs.true.lr.ph.i453 + # in Loop: Header=BB0_241 Depth=2 lwu a7, 12(a0) ld a4, 160(sp) # 8-byte Folded Reload lw a6, 0(a4) sext.w a4, a7 - bltu a6, a4, .LBB0_432 -# %bb.431: # %land.lhs.true.lr.ph.i453 - # in Loop: Header=BB0_242 Depth=2 + bltu a6, a4, .LBB0_431 +# %bb.430: # %land.lhs.true.lr.ph.i453 + # in Loop: Header=BB0_241 Depth=2 mv a4, a6 -.LBB0_432: # %land.lhs.true.lr.ph.i453 - # in Loop: Header=BB0_242 Depth=2 +.LBB0_431: # %land.lhs.true.lr.ph.i453 + # in Loop: Header=BB0_241 Depth=2 slli a4, a4, 32 srli t0, a4, 32 negw a2, a2 @@ -3688,18 +3678,18 @@ addi a6, a7, 1 add a1, a1, a7 sub a7, t0, a7 -.LBB0_433: # %land.lhs.true.i457 +.LBB0_432: # %land.lhs.true.i457 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 - beqz a7, .LBB0_282 -# %bb.434: # %land.rhs.i461 - # in Loop: Header=BB0_433 Depth=3 + beqz a7, .LBB0_281 +# %bb.433: # %land.rhs.i461 + # in Loop: Header=BB0_432 Depth=3 lbu t0, 0(a5) lbu t1, 0(a1) - bne t0, t1, .LBB0_282 -# %bb.435: # %while.body.i465 - # in Loop: Header=BB0_433 Depth=3 + bne t0, t1, .LBB0_281 +# %bb.434: # %while.body.i465 + # in Loop: Header=BB0_432 Depth=3 sw a4, 8(a0) sw a6, 12(a0) addi a4, a4, 1 @@ -3708,12 +3698,11 @@ addi a6, a6, 1 addi a1, a1, 1 addi a7, a7, -1 - bne t0, t5, .LBB0_433 - j .LBB0_282 -.LBB0_436: # %vector.ph1045 - # in Loop: Header=BB0_242 Depth=2 - ld a0, 312(sp) # 8-byte Folded Reload - srli a0, a0, 1 + bne t0, t5, .LBB0_432 + j .LBB0_281 +.LBB0_435: # %vector.ph1045 + # in Loop: Header=BB0_241 Depth=2 + srli a0, a3, 1 neg a0, a0 and a0, s4, a0 mv a1, a0 @@ -3722,41 +3711,41 @@ addi a5, sp, 576 vl2r.v v8, (a5) # Unknown-size Folded Reload ld a5, 224(sp) # 8-byte Folded Reload -.LBB0_437: # %vector.body1050 +.LBB0_436: # %vector.body1050 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_242 Depth=2 + # Parent Loop BB0_241 Depth=2 # => This Inner Loop Header: Depth=3 vs2r.v v8, (a3) vs2r.v v8, (a2) add a3, a3, a4 sub a1, a1, a5 add a2, a2, a4 - bnez a1, .LBB0_437 -# %bb.438: # %middle.block1042 - # in Loop: Header=BB0_242 Depth=2 - beq s4, a0, .LBB0_321 - j .LBB0_319 -.LBB0_439: # %if.end505 + bnez a1, .LBB0_436 +# %bb.437: # %middle.block1042 + # in Loop: Header=BB0_241 Depth=2 + beq s4, a0, .LBB0_320 + j .LBB0_318 +.LBB0_438: # %if.end505 # in Loop: Header=BB0_6 Depth=1 ld a1, 8(s11) ld a2, 16(s10) mv a0, s8 call kill_polyA lw a2, 16(s8) - bltu a2, s6, .LBB0_465 -# %bb.440: # %while.body.i540.preheader + bltu a2, s6, .LBB0_464 +# %bb.439: # %while.body.i540.preheader # in Loop: Header=BB0_6 Depth=1 lw s2, 36(s11) li s3, 1 - j .LBB0_443 -.LBB0_441: # %if.else60.i - # in Loop: Header=BB0_443 Depth=2 + j .LBB0_442 +.LBB0_440: # %if.else60.i + # in Loop: Header=BB0_442 Depth=2 addiw s3, s3, 1 -.LBB0_442: # %if.end62.ithread-pre-split - # in Loop: Header=BB0_443 Depth=2 +.LBB0_441: # %if.end62.ithread-pre-split + # in Loop: Header=BB0_442 Depth=2 lw a2, 16(s8) - bgeu s3, a2, .LBB0_451 -.LBB0_443: # %while.body.i540 + bgeu s3, a2, .LBB0_450 +.LBB0_442: # %while.body.i540 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld a1, 0(s5) @@ -3773,17 +3762,17 @@ lw a3, 4(s1) lw a4, 36(s9) subw s4, a1, a3 - bltu a4, s4, .LBB0_441 -# %bb.444: # %if.then.i545 - # in Loop: Header=BB0_443 Depth=2 + bltu a4, s4, .LBB0_440 +# %bb.443: # %if.then.i545 + # in Loop: Header=BB0_442 Depth=2 addi a2, a2, -1 sw a2, 16(s8) lw a1, 12(s1) lw a2, 12(s0) srli s5, a0, 32 - bgeu a2, a1, .LBB0_447 -# %bb.445: # %if.then11.i - # in Loop: Header=BB0_443 Depth=2 + bgeu a2, a1, .LBB0_446 +# %bb.444: # %if.then11.i + # in Loop: Header=BB0_442 Depth=2 mv a0, s0 call free ld a0, 8(s8) @@ -3796,9 +3785,9 @@ srli a2, a2, 29 call memmove lw a2, 16(s8) - bgeu s3, a2, .LBB0_449 -# %bb.446: # %if.then22.i - # in Loop: Header=BB0_443 Depth=2 + bgeu s3, a2, .LBB0_448 +# %bb.445: # %if.then22.i + # in Loop: Header=BB0_442 Depth=2 ld a1, 440(sp) # 8-byte Folded Reload ld a0, 0(a1) add a0, a0, s5 @@ -3812,9 +3801,9 @@ vle32.v v8, (a0) vsub.vx v8, v8, s4 vse32.v v8, (a0) - j .LBB0_442 -.LBB0_447: # %if.else.i548 - # in Loop: Header=BB0_443 Depth=2 + j .LBB0_441 +.LBB0_446: # %if.else.i548 + # in Loop: Header=BB0_442 Depth=2 mv a0, s1 call free ld a1, 8(s8) @@ -3827,9 +3816,9 @@ slli a2, a2, 32 srli a2, a2, 29 call memmove - bltu s3, s6, .LBB0_450 -# %bb.448: # %if.then45.i - # in Loop: Header=BB0_443 Depth=2 + bltu s3, s6, .LBB0_449 +# %bb.447: # %if.then45.i + # in Loop: Header=BB0_442 Depth=2 ld s5, 440(sp) # 8-byte Folded Reload ld a0, 0(s5) addi a1, s3, -2 @@ -3845,28 +3834,28 @@ vle32.v v8, (s0) vsub.vx v8, v8, s4 vse32.v v8, (s0) - j .LBB0_442 -.LBB0_449: # in Loop: Header=BB0_443 Depth=2 + j .LBB0_441 +.LBB0_448: # in Loop: Header=BB0_442 Depth=2 ld s5, 440(sp) # 8-byte Folded Reload - bltu s3, a2, .LBB0_443 - j .LBB0_451 -.LBB0_450: # in Loop: Header=BB0_443 Depth=2 + bltu s3, a2, .LBB0_442 + j .LBB0_450 +.LBB0_449: # in Loop: Header=BB0_442 Depth=2 ld s5, 440(sp) # 8-byte Folded Reload - j .LBB0_442 -.LBB0_451: # %for.cond.preheader.i + j .LBB0_441 +.LBB0_450: # %for.cond.preheader.i # in Loop: Header=BB0_6 Depth=1 - bltu a2, s6, .LBB0_465 -# %bb.452: # %for.body.lr.ph.i + bltu a2, s6, .LBB0_464 +# %bb.451: # %for.body.lr.ph.i # in Loop: Header=BB0_6 Depth=1 addi s2, s2, 1 li s1, 1 - j .LBB0_455 -.LBB0_453: # in Loop: Header=BB0_455 Depth=2 + j .LBB0_454 +.LBB0_452: # in Loop: Header=BB0_454 Depth=2 addiw s1, s1, 1 - bltu s1, a2, .LBB0_455 - j .LBB0_465 -.LBB0_454: # %if.then91.i555 - # in Loop: Header=BB0_455 Depth=2 + bltu s1, a2, .LBB0_454 + j .LBB0_464 +.LBB0_453: # %if.then91.i555 + # in Loop: Header=BB0_454 Depth=2 addi a2, a0, 8 vsetivli zero, 2, e32, mf2, ta, ma vle32.v v8, (a2) @@ -3887,8 +3876,8 @@ call memmove lw a2, 16(s8) addiw s1, s0, 1 - bgeu s1, a2, .LBB0_465 -.LBB0_455: # %for.body.i550 + bgeu s1, a2, .LBB0_464 +.LBB0_454: # %for.body.i550 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld a0, 0(s5) @@ -3904,57 +3893,57 @@ lw a5, 8(a1) lw a4, 0(a0) addiw a6, a5, 31 - bgeu a4, a6, .LBB0_458 -# %bb.456: # %land.lhs.true.i556 - # in Loop: Header=BB0_455 Depth=2 + bgeu a4, a6, .LBB0_457 +# %bb.455: # %land.lhs.true.i556 + # in Loop: Header=BB0_454 Depth=2 lw a6, 12(a1) lw a7, 4(a0) addw t0, s2, a6 - bgeu t0, a7, .LBB0_454 -# %bb.457: # %lor.lhs.false.i - # in Loop: Header=BB0_455 Depth=2 - bltu a5, a4, .LBB0_459 - j .LBB0_453 -.LBB0_458: # %for.body.lor.lhs.false_crit_edge.i - # in Loop: Header=BB0_455 Depth=2 + bgeu t0, a7, .LBB0_453 +# %bb.456: # %lor.lhs.false.i + # in Loop: Header=BB0_454 Depth=2 + bltu a5, a4, .LBB0_458 + j .LBB0_452 +.LBB0_457: # %for.body.lor.lhs.false_crit_edge.i + # in Loop: Header=BB0_454 Depth=2 lw a6, 12(a1) lw a7, 4(a0) - bgeu a5, a4, .LBB0_453 -.LBB0_459: # %lor.lhs.false.i - # in Loop: Header=BB0_455 Depth=2 - bgeu a6, a7, .LBB0_453 -# %bb.460: # %about_same_gap_p.exit.i - # in Loop: Header=BB0_455 Depth=2 + bgeu a5, a4, .LBB0_452 +.LBB0_458: # %lor.lhs.false.i + # in Loop: Header=BB0_454 Depth=2 + bgeu a6, a7, .LBB0_452 +# %bb.459: # %about_same_gap_p.exit.i + # in Loop: Header=BB0_454 Depth=2 not a5, a5 addw t0, a4, a5 not a4, a6 addw a4, a7, a4 mv a5, a4 - bltu a4, t0, .LBB0_462 -# %bb.461: # %about_same_gap_p.exit.i - # in Loop: Header=BB0_455 Depth=2 + bltu a4, t0, .LBB0_461 +# %bb.460: # %about_same_gap_p.exit.i + # in Loop: Header=BB0_454 Depth=2 mv a5, t0 -.LBB0_462: # %about_same_gap_p.exit.i - # in Loop: Header=BB0_455 Depth=2 - bltu t0, a4, .LBB0_464 -# %bb.463: # %about_same_gap_p.exit.i - # in Loop: Header=BB0_455 Depth=2 +.LBB0_461: # %about_same_gap_p.exit.i + # in Loop: Header=BB0_454 Depth=2 + bltu t0, a4, .LBB0_463 +# %bb.462: # %about_same_gap_p.exit.i + # in Loop: Header=BB0_454 Depth=2 mv a4, t0 -.LBB0_464: # %about_same_gap_p.exit.i - # in Loop: Header=BB0_455 Depth=2 +.LBB0_463: # %about_same_gap_p.exit.i + # in Loop: Header=BB0_454 Depth=2 lw a6, 32(s9) subw a5, a4, a5 li a7, 100 mul a5, a5, a7 divuw a4, a5, a4 - bgeu a6, a4, .LBB0_454 - j .LBB0_453 -.LBB0_465: # %compact_exons.exit + bgeu a6, a4, .LBB0_453 + j .LBB0_452 +.LBB0_464: # %compact_exons.exit # in Loop: Header=BB0_6 Depth=1 addi a0, s8, 32 sd a0, 456(sp) # 8-byte Folded Spill - beqz a2, .LBB0_515 -# %bb.466: # %while.body518.preheader + beqz a2, .LBB0_514 +# %bb.465: # %while.body518.preheader # in Loop: Header=BB0_6 Depth=1 ld a0, 0(s5) ld a0, 0(a0) @@ -3964,19 +3953,19 @@ subw a1, a1, a3 li s0, 0 addiw a1, a1, 1 - bgeu a1, a4, .LBB0_470 -# %bb.467: # %cleanup533.preheader + bgeu a1, a4, .LBB0_469 +# %bb.466: # %cleanup533.preheader # in Loop: Header=BB0_6 Depth=1 li s1, 8 -.LBB0_468: # %cleanup533 +.LBB0_467: # %cleanup533 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 call free lw a2, 16(s8) addiw s0, s0, 1 - bgeu s0, a2, .LBB0_471 -# %bb.469: # %while.body518 - # in Loop: Header=BB0_468 Depth=2 + bgeu s0, a2, .LBB0_470 +# %bb.468: # %while.body518 + # in Loop: Header=BB0_467 Depth=2 ld a0, 0(s5) add a0, a0, s1 ld a0, 0(a0) @@ -3986,11 +3975,11 @@ subw a1, a1, a3 addiw a1, a1, 1 addi s1, s1, 8 - bltu a1, a4, .LBB0_468 -.LBB0_470: # %while.end536 + bltu a1, a4, .LBB0_467 +.LBB0_469: # %while.end536 # in Loop: Header=BB0_6 Depth=1 - beqz s0, .LBB0_472 -.LBB0_471: # %if.then539 + beqz s0, .LBB0_471 +.LBB0_470: # %if.then539 # in Loop: Header=BB0_6 Depth=1 ld a0, 8(s8) slli a1, s0, 32 @@ -4003,11 +3992,11 @@ lw a2, 16(s8) subw a2, a2, s0 sw a2, 16(s8) -.LBB0_472: # %if.end551 +.LBB0_471: # %if.end551 # in Loop: Header=BB0_6 Depth=1 addiw a0, a2, -1 - bltz a0, .LBB0_477 -# %bb.473: # %while.body562.preheader + bltz a0, .LBB0_476 +# %bb.472: # %while.body562.preheader # in Loop: Header=BB0_6 Depth=1 ld a1, 0(s5) slli a0, a0, 3 @@ -4018,11 +4007,11 @@ lw a4, 36(s11) subw a1, a1, a3 addiw a1, a1, 1 - bgeu a1, a4, .LBB0_477 -# %bb.474: # %cleanup579.preheader + bgeu a1, a4, .LBB0_476 +# %bb.473: # %cleanup579.preheader # in Loop: Header=BB0_6 Depth=1 addiw s0, a2, -2 -.LBB0_475: # %cleanup579 +.LBB0_474: # %cleanup579 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 addiw s1, s0, 1 @@ -4030,9 +4019,9 @@ lw a2, 16(s8) addiw a2, a2, -1 sw a2, 16(s8) - blez s1, .LBB0_477 -# %bb.476: # %while.body562 - # in Loop: Header=BB0_475 Depth=2 + blez s1, .LBB0_476 +# %bb.475: # %while.body562 + # in Loop: Header=BB0_474 Depth=2 ld a0, 0(s5) slli a1, s0, 3 add a0, a0, a1 @@ -4043,16 +4032,16 @@ subw a1, a1, a3 addiw a1, a1, 1 addiw s0, s0, -1 - bltu a1, a4, .LBB0_475 -.LBB0_477: # %if.end583 + bltu a1, a4, .LBB0_474 +.LBB0_476: # %if.end583 # in Loop: Header=BB0_6 Depth=1 ld a0, 8(s11) sd a0, 464(sp) # 8-byte Folded Spill ld a0, 16(s10) sd a0, 448(sp) # 8-byte Folded Spill li t1, 1 - bltu a2, s6, .LBB0_565 -# %bb.478: # %for.body.lr.ph.i571 + bltu a2, s6, .LBB0_564 +# %bb.477: # %for.body.lr.ph.i571 # in Loop: Header=BB0_6 Depth=1 ld s1, 464(sp) # 8-byte Folded Reload addi a0, s1, -1 @@ -4065,21 +4054,21 @@ sd a2, 416(sp) # 8-byte Folded Spill li a4, 1 lui t5, 1044480 - j .LBB0_481 -.LBB0_479: # %perfect_spl_p.exit.thread - # in Loop: Header=BB0_481 Depth=2 + j .LBB0_480 +.LBB0_478: # %perfect_spl_p.exit.thread + # in Loop: Header=BB0_480 Depth=2 li t1, 1 -.LBB0_480: # %cleanup.i - # in Loop: Header=BB0_481 Depth=2 +.LBB0_479: # %cleanup.i + # in Loop: Header=BB0_480 Depth=2 lwu a2, 16(s8) addi a4, a4, 1 - bgeu a4, a2, .LBB0_517 -.LBB0_481: # %for.body.i572 + bgeu a4, a2, .LBB0_516 +.LBB0_480: # %for.body.i572 # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_485 Depth 3 - # Child Loop BB0_504 Depth 3 - # Child Loop BB0_498 Depth 3 + # Child Loop BB0_484 Depth 3 + # Child Loop BB0_503 Depth 3 + # Child Loop BB0_497 Depth 3 ld a2, 0(s5) slli a5, a4, 3 add a5, a2, a5 @@ -4089,9 +4078,9 @@ lw a5, 4(a6) lwu t0, 12(a2) subw a5, a5, t0 - bne a5, t1, .LBB0_480 -# %bb.482: # %if.end.i590 - # in Loop: Header=BB0_481 Depth=2 + bne a5, t1, .LBB0_479 +# %bb.481: # %if.end.i590 + # in Loop: Header=BB0_480 Depth=2 lwu a5, 8(a2) lwu t1, 44(s9) add a5, s1, a5 @@ -4105,81 +4094,81 @@ xor a7, t4, s1 seqz t3, a7 sext.w a7, t1 - bltu t1, s6, .LBB0_493 -# %bb.483: # %for.body.i.i670.preheader - # in Loop: Header=BB0_481 Depth=2 + bltu t1, s6, .LBB0_492 +# %bb.482: # %for.body.i.i670.preheader + # in Loop: Header=BB0_480 Depth=2 li t5, 0 li s0, 0 li t6, 1 sub t6, t6, t1 - j .LBB0_485 -.LBB0_484: # %for.body.i.i670 - # in Loop: Header=BB0_485 Depth=3 + j .LBB0_484 +.LBB0_483: # %for.body.i.i670 + # in Loop: Header=BB0_484 Depth=3 addi t6, t6, 1 mv s1, s2 - beqz t6, .LBB0_501 -.LBB0_485: # %for.body.i.i670 + beqz t6, .LBB0_500 +.LBB0_484: # %for.body.i.i670 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_481 Depth=2 + # Parent Loop BB0_480 Depth=2 # => This Inner Loop Header: Depth=3 add s2, t2, t6 lbu s2, 0(s2) xor t4, t4, s2 seqz t4, t4 addw t5, t5, t4 - blt t3, t5, .LBB0_487 -# %bb.486: # %for.body.i.i670 - # in Loop: Header=BB0_485 Depth=3 + blt t3, t5, .LBB0_486 +# %bb.485: # %for.body.i.i670 + # in Loop: Header=BB0_484 Depth=3 mv t5, t3 -.LBB0_487: # %for.body.i.i670 - # in Loop: Header=BB0_485 Depth=3 +.LBB0_486: # %for.body.i.i670 + # in Loop: Header=BB0_484 Depth=3 add t4, a5, t6 lbu t4, 0(t4) xor s1, t4, s1 seqz s1, s1 addw s0, s0, s1 - blt t3, s0, .LBB0_489 -# %bb.488: # %for.body.i.i670 - # in Loop: Header=BB0_485 Depth=3 + blt t3, s0, .LBB0_488 +# %bb.487: # %for.body.i.i670 + # in Loop: Header=BB0_484 Depth=3 mv s0, t3 -.LBB0_489: # %for.body.i.i670 - # in Loop: Header=BB0_485 Depth=3 +.LBB0_488: # %for.body.i.i670 + # in Loop: Header=BB0_484 Depth=3 addiw s1, t5, -1 addiw s3, s0, -1 - blt s3, s1, .LBB0_491 -# %bb.490: # %for.body.i.i670 - # in Loop: Header=BB0_485 Depth=3 + blt s3, s1, .LBB0_490 +# %bb.489: # %for.body.i.i670 + # in Loop: Header=BB0_484 Depth=3 mv s1, s3 -.LBB0_491: # %for.body.i.i670 - # in Loop: Header=BB0_485 Depth=3 +.LBB0_490: # %for.body.i.i670 + # in Loop: Header=BB0_484 Depth=3 xor s3, t4, s2 seqz s3, s3 addw s3, t3, s3 mv t3, s1 - blt s3, s1, .LBB0_484 -# %bb.492: # %for.body.i.i670 - # in Loop: Header=BB0_485 Depth=3 + blt s3, s1, .LBB0_483 +# %bb.491: # %for.body.i.i670 + # in Loop: Header=BB0_484 Depth=3 mv t3, s3 - j .LBB0_484 -.LBB0_493: # %SWscore.exit.i.thread - # in Loop: Header=BB0_481 Depth=2 + j .LBB0_483 +.LBB0_492: # %SWscore.exit.i.thread + # in Loop: Header=BB0_480 Depth=2 ld s1, 464(sp) # 8-byte Folded Reload - bltu t3, t1, .LBB0_479 -# %bb.494: # %if.end.i655.thread - # in Loop: Header=BB0_481 Depth=2 + bltu t3, t1, .LBB0_478 +# %bb.493: # %if.end.i655.thread + # in Loop: Header=BB0_480 Depth=2 ld t0, 360(sp) # 8-byte Folded Reload add t0, t0, a6 lbu t0, 0(t0) lbu t1, 0(t2) xor t0, t0, t1 seqz s2, t0 -.LBB0_495: # %SWscore.exit51.i - # in Loop: Header=BB0_481 Depth=2 +.LBB0_494: # %SWscore.exit51.i + # in Loop: Header=BB0_480 Depth=2 lui t5, 1044480 ld s1, 464(sp) # 8-byte Folded Reload - bltu s2, a7, .LBB0_479 -# %bb.496: # %if.end17.i659 - # in Loop: Header=BB0_481 Depth=2 + bltu s2, a7, .LBB0_478 +# %bb.495: # %if.end17.i659 + # in Loop: Header=BB0_480 Depth=2 lbu a7, 1(a5) lbu a5, 0(a5) slli a7, a7, 8 @@ -4191,15 +4180,15 @@ slli a5, a5, 8 or a5, a5, a6 sh a5, 474(sp) - beqz a0, .LBB0_479 -# %bb.497: # %for.body.i662.preheader - # in Loop: Header=BB0_481 Depth=2 + beqz a0, .LBB0_478 +# %bb.496: # %for.body.i662.preheader + # in Loop: Header=BB0_480 Depth=2 li a5, 0 mv a6, a1 mv a7, a0 -.LBB0_498: # %for.body.i662 +.LBB0_497: # %for.body.i662 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_481 Depth=2 + # Parent Loop BB0_480 Depth=2 # => This Inner Loop Header: Depth=3 lbu t0, 1(a6) lbu t1, 0(a6) @@ -4215,9 +4204,9 @@ or t0, t2, t0 slli t1, t1, 16 or t1, t1, t4 - beq t1, t0, .LBB0_512 -# %bb.499: # %if.end33.i - # in Loop: Header=BB0_498 Depth=3 + beq t1, t0, .LBB0_511 +# %bb.498: # %if.end33.i + # in Loop: Header=BB0_497 Depth=3 lbu t0, 5(a6) lbu t1, 4(a6) lbu t2, 6(a6) @@ -4232,21 +4221,21 @@ or t0, t2, t0 slli t1, t1, 16 or t1, t1, t4 - beq t1, t0, .LBB0_513 -# %bb.500: # %for.inc.i665 - # in Loop: Header=BB0_498 Depth=3 + beq t1, t0, .LBB0_512 +# %bb.499: # %for.inc.i665 + # in Loop: Header=BB0_497 Depth=3 add a5, a5, t5 addi a7, a7, -1 addi a6, a6, 8 - bnez a7, .LBB0_498 - j .LBB0_479 -.LBB0_501: # %SWscore.exit.i - # in Loop: Header=BB0_481 Depth=2 + bnez a7, .LBB0_497 + j .LBB0_478 +.LBB0_500: # %SWscore.exit.i + # in Loop: Header=BB0_480 Depth=2 lui t5, 1044480 ld s1, 464(sp) # 8-byte Folded Reload - bltu t3, a7, .LBB0_479 -# %bb.502: # %if.end.i655 - # in Loop: Header=BB0_481 Depth=2 + bltu t3, a7, .LBB0_478 +# %bb.501: # %if.end.i655 + # in Loop: Header=BB0_480 Depth=2 ld s2, 360(sp) # 8-byte Folded Reload add t3, s2, a6 lbu t3, 0(t3) @@ -4260,69 +4249,69 @@ add t0, s1, t0 add t1, t1, a6 add t1, s2, t1 - j .LBB0_504 -.LBB0_503: # %for.body.i27.i - # in Loop: Header=BB0_504 Depth=3 + j .LBB0_503 +.LBB0_502: # %for.body.i27.i + # in Loop: Header=BB0_503 Depth=3 addi t5, t5, 1 addi t0, t0, 1 mv t6, s1 mv s0, s2 - beq t5, t1, .LBB0_495 -.LBB0_504: # %for.body.i27.i + beq t5, t1, .LBB0_494 +.LBB0_503: # %for.body.i27.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_481 Depth=2 + # Parent Loop BB0_480 Depth=2 # => This Inner Loop Header: Depth=3 lbu s1, 0(t0) xor t3, t3, s1 seqz t3, t3 addw t2, t2, t3 - blt s0, t2, .LBB0_506 -# %bb.505: # %for.body.i27.i - # in Loop: Header=BB0_504 Depth=3 + blt s0, t2, .LBB0_505 +# %bb.504: # %for.body.i27.i + # in Loop: Header=BB0_503 Depth=3 mv t2, s0 -.LBB0_506: # %for.body.i27.i - # in Loop: Header=BB0_504 Depth=3 +.LBB0_505: # %for.body.i27.i + # in Loop: Header=BB0_503 Depth=3 lbu t3, 0(t5) xor t6, t3, t6 seqz t6, t6 addw t4, t4, t6 - blt s0, t4, .LBB0_509 -# %bb.507: # %for.body.i27.i - # in Loop: Header=BB0_504 Depth=3 + blt s0, t4, .LBB0_508 +# %bb.506: # %for.body.i27.i + # in Loop: Header=BB0_503 Depth=3 mv t4, s0 addiw s2, t2, -1 addiw t6, s0, -1 - bge t6, s2, .LBB0_510 -.LBB0_508: # %for.body.i27.i - # in Loop: Header=BB0_504 Depth=3 + bge t6, s2, .LBB0_509 +.LBB0_507: # %for.body.i27.i + # in Loop: Header=BB0_503 Depth=3 xor t6, t3, s1 seqz t6, t6 addw t6, s0, t6 - blt t6, s2, .LBB0_503 - j .LBB0_511 -.LBB0_509: # %for.body.i27.i - # in Loop: Header=BB0_504 Depth=3 + blt t6, s2, .LBB0_502 + j .LBB0_510 +.LBB0_508: # %for.body.i27.i + # in Loop: Header=BB0_503 Depth=3 addiw s2, t2, -1 addiw t6, t4, -1 - blt t6, s2, .LBB0_508 -.LBB0_510: # %for.body.i27.i - # in Loop: Header=BB0_504 Depth=3 + blt t6, s2, .LBB0_507 +.LBB0_509: # %for.body.i27.i + # in Loop: Header=BB0_503 Depth=3 mv s2, t6 xor t6, t3, s1 seqz t6, t6 addw t6, s0, t6 - blt t6, s2, .LBB0_503 -.LBB0_511: # %for.body.i27.i - # in Loop: Header=BB0_504 Depth=3 + blt t6, s2, .LBB0_502 +.LBB0_510: # %for.body.i27.i + # in Loop: Header=BB0_503 Depth=3 mv s2, t6 - j .LBB0_503 -.LBB0_512: # in Loop: Header=BB0_481 Depth=2 + j .LBB0_502 +.LBB0_511: # in Loop: Header=BB0_480 Depth=2 li a6, 1 - j .LBB0_514 -.LBB0_513: # in Loop: Header=BB0_481 Depth=2 + j .LBB0_513 +.LBB0_512: # in Loop: Header=BB0_480 Depth=2 li a6, -1 -.LBB0_514: # %if.then17.i - # in Loop: Header=BB0_481 Depth=2 +.LBB0_513: # %if.then17.i + # in Loop: Header=BB0_480 Depth=2 li t1, 1 lw a7, 32(s8) add a7, a7, a6 @@ -4342,44 +4331,44 @@ or a6, a7, a6 subw a5, a6, a5 sw a5, 28(a2) - j .LBB0_480 -.LBB0_515: # %for.end183.i563.thread + j .LBB0_479 +.LBB0_514: # %for.end183.i563.thread # in Loop: Header=BB0_6 Depth=1 lw a0, 32(s8) - bnez a0, .LBB0_614 -# %bb.516: # in Loop: Header=BB0_6 Depth=1 + bnez a0, .LBB0_613 +# %bb.515: # in Loop: Header=BB0_6 Depth=1 ld s7, 8(s11) - ld s1, 16(s10) - mv a5, a2 + ld a5, 16(s10) mv a6, a2 - j .LBB0_596 -.LBB0_517: # %for.cond43.preheader.i + mv a7, a2 + j .LBB0_595 +.LBB0_516: # %for.cond43.preheader.i # in Loop: Header=BB0_6 Depth=1 - bltu a2, s6, .LBB0_565 -# %bb.518: # %for.body47.i.preheader + bltu a2, s6, .LBB0_564 +# %bb.517: # %for.body47.i.preheader # in Loop: Header=BB0_6 Depth=1 li a3, 1 - j .LBB0_521 -.LBB0_519: # %if.end172.i - # in Loop: Header=BB0_521 Depth=2 + j .LBB0_520 +.LBB0_518: # %if.end172.i + # in Loop: Header=BB0_520 Depth=2 call free lw a2, 16(s8) ld a3, 368(sp) # 8-byte Folded Reload -.LBB0_520: # %cleanup173.i - # in Loop: Header=BB0_521 Depth=2 +.LBB0_519: # %cleanup173.i + # in Loop: Header=BB0_520 Depth=2 addi a3, a3, 1 slli a0, a2, 32 srli a0, a0, 32 ld s5, 440(sp) # 8-byte Folded Reload - bgeu a3, a0, .LBB0_565 -.LBB0_521: # %for.body47.i + bgeu a3, a0, .LBB0_564 +.LBB0_520: # %for.body47.i # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_526 Depth 3 - # Child Loop BB0_529 Depth 4 - # Child Loop BB0_542 Depth 4 - # Child Loop BB0_554 Depth 4 - # Child Loop BB0_563 Depth 3 + # Child Loop BB0_525 Depth 3 + # Child Loop BB0_528 Depth 4 + # Child Loop BB0_541 Depth 4 + # Child Loop BB0_553 Depth 4 + # Child Loop BB0_562 Depth 3 ld a0, 0(s5) slli a1, a3, 3 add a0, a0, a1 @@ -4387,9 +4376,9 @@ ld s5, -8(a0) lw s1, 4(a1) lw s0, 12(s5) - bltu s0, s1, .LBB0_520 -# %bb.522: # %if.end65.i - # in Loop: Header=BB0_521 Depth=2 + bltu s0, s1, .LBB0_519 +# %bb.521: # %if.end65.i + # in Loop: Header=BB0_520 Depth=2 sd a3, 368(sp) # 8-byte Folded Spill subw s2, s0, s1 addiw s3, s2, 2 @@ -4400,9 +4389,9 @@ call xmalloc ld a5, 376(sp) # 8-byte Folded Reload li a2, 1 - beqz s3, .LBB0_519 -# %bb.523: # %for.body75.lr.ph.i - # in Loop: Header=BB0_521 Depth=2 + beqz s3, .LBB0_518 +# %bb.522: # %for.body75.lr.ph.i + # in Loop: Header=BB0_520 Depth=2 li a3, 0 sd zero, 400(sp) # 8-byte Folded Spill srli a1, s4, 32 @@ -4429,26 +4418,26 @@ mv s9, t0 ld t4, 464(sp) # 8-byte Folded Reload ld s1, 448(sp) # 8-byte Folded Reload - j .LBB0_526 -.LBB0_524: # %if.else.i586 - # in Loop: Header=BB0_526 Depth=3 + j .LBB0_525 +.LBB0_523: # %if.else.i586 + # in Loop: Header=BB0_525 Depth=3 sw zero, 24(s10) -.LBB0_525: # %for.inc103.i - # in Loop: Header=BB0_526 Depth=3 +.LBB0_524: # %for.inc103.i + # in Loop: Header=BB0_525 Depth=3 ld t4, 464(sp) # 8-byte Folded Reload ld s1, 448(sp) # 8-byte Folded Reload addi a3, a3, 1 addi t5, t5, 1 addi t6, t6, 1 addi s9, s9, 1 - beq a3, a1, .LBB0_560 -.LBB0_526: # %for.body75.i + beq a3, a1, .LBB0_559 +.LBB0_525: # %for.body75.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_521 Depth=2 + # Parent Loop BB0_520 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_529 Depth 4 - # Child Loop BB0_542 Depth 4 - # Child Loop BB0_554 Depth 4 + # Child Loop BB0_528 Depth 4 + # Child Loop BB0_541 Depth 4 + # Child Loop BB0_553 Depth 4 add a2, a3, a6 add a5, a2, a4 li t2, 28 @@ -4473,9 +4462,9 @@ srli ra, a2, 32 xor a2, a5, s2 seqz t3, a2 - bltu s8, s6, .LBB0_537 -# %bb.527: # %for.body.i.i.i.preheader - # in Loop: Header=BB0_526 Depth=3 + bltu s8, s6, .LBB0_536 +# %bb.526: # %for.body.i.i.i.preheader + # in Loop: Header=BB0_525 Depth=3 li s3, 0 li a2, 0 slli t2, t5, 32 @@ -4483,73 +4472,73 @@ add s0, t4, s0 add s6, s1, ra ld t4, 432(sp) # 8-byte Folded Reload - j .LBB0_529 -.LBB0_528: # %for.body.i.i.i - # in Loop: Header=BB0_529 Depth=4 + j .LBB0_528 +.LBB0_527: # %for.body.i.i.i + # in Loop: Header=BB0_528 Depth=4 addi t4, t4, 1 mv s2, t2 mv t3, s1 - beqz t4, .LBB0_538 -.LBB0_529: # %for.body.i.i.i + beqz t4, .LBB0_537 +.LBB0_528: # %for.body.i.i.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_521 Depth=2 - # Parent Loop BB0_526 Depth=3 + # Parent Loop BB0_520 Depth=2 + # Parent Loop BB0_525 Depth=3 # => This Inner Loop Header: Depth=4 add t2, s6, t4 lbu t2, 0(t2) xor a5, a5, t2 seqz a5, a5 addw s3, s3, a5 - blt t3, s3, .LBB0_531 -# %bb.530: # %for.body.i.i.i - # in Loop: Header=BB0_529 Depth=4 + blt t3, s3, .LBB0_530 +# %bb.529: # %for.body.i.i.i + # in Loop: Header=BB0_528 Depth=4 mv s3, t3 -.LBB0_531: # %for.body.i.i.i - # in Loop: Header=BB0_529 Depth=4 +.LBB0_530: # %for.body.i.i.i + # in Loop: Header=BB0_528 Depth=4 add a5, s0, t4 lbu a5, 0(a5) xor s1, a5, s2 seqz s1, s1 addw a2, a2, s1 - blt t3, a2, .LBB0_534 -# %bb.532: # %for.body.i.i.i - # in Loop: Header=BB0_529 Depth=4 + blt t3, a2, .LBB0_533 +# %bb.531: # %for.body.i.i.i + # in Loop: Header=BB0_528 Depth=4 mv a2, t3 addiw s1, s3, -1 addiw s2, t3, -1 - bge s2, s1, .LBB0_535 -.LBB0_533: # %for.body.i.i.i - # in Loop: Header=BB0_529 Depth=4 + bge s2, s1, .LBB0_534 +.LBB0_532: # %for.body.i.i.i + # in Loop: Header=BB0_528 Depth=4 xor s2, a5, t2 seqz s2, s2 addw t3, t3, s2 - blt t3, s1, .LBB0_528 - j .LBB0_536 -.LBB0_534: # %for.body.i.i.i - # in Loop: Header=BB0_529 Depth=4 + blt t3, s1, .LBB0_527 + j .LBB0_535 +.LBB0_533: # %for.body.i.i.i + # in Loop: Header=BB0_528 Depth=4 addiw s1, s3, -1 addiw s2, a2, -1 - blt s2, s1, .LBB0_533 -.LBB0_535: # %for.body.i.i.i - # in Loop: Header=BB0_529 Depth=4 + blt s2, s1, .LBB0_532 +.LBB0_534: # %for.body.i.i.i + # in Loop: Header=BB0_528 Depth=4 mv s1, s2 xor s2, a5, t2 seqz s2, s2 addw t3, t3, s2 - blt t3, s1, .LBB0_528 -.LBB0_536: # %for.body.i.i.i - # in Loop: Header=BB0_529 Depth=4 + blt t3, s1, .LBB0_527 +.LBB0_535: # %for.body.i.i.i + # in Loop: Header=BB0_528 Depth=4 mv s1, t3 - j .LBB0_528 -.LBB0_537: # in Loop: Header=BB0_526 Depth=3 + j .LBB0_527 +.LBB0_536: # in Loop: Header=BB0_525 Depth=3 mv s1, t3 -.LBB0_538: # %SWscore.exit.i.i - # in Loop: Header=BB0_526 Depth=3 +.LBB0_537: # %SWscore.exit.i.i + # in Loop: Header=BB0_525 Depth=3 sext.w a5, s8 li s6, 2 - bltu s1, a5, .LBB0_524 -# %bb.539: # %if.end.i.i - # in Loop: Header=BB0_526 Depth=3 + bltu s1, a5, .LBB0_523 +# %bb.538: # %if.end.i.i + # in Loop: Header=BB0_525 Depth=3 slli s4, s4, 32 srli s4, s4, 32 ld s2, 464(sp) # 8-byte Folded Reload @@ -4558,9 +4547,9 @@ lbu t2, 0(s7) xor a2, s1, t2 seqz s7, a2 - bltu s8, s6, .LBB0_550 -# %bb.540: # %for.body.i27.i.i.preheader - # in Loop: Header=BB0_526 Depth=3 + bltu s8, s6, .LBB0_549 +# %bb.539: # %for.body.i27.i.i.preheader + # in Loop: Header=BB0_525 Depth=3 li t3, 0 li a2, 0 slli t4, s9, 32 @@ -4570,71 +4559,71 @@ add s6, s2, s6 ld t4, 416(sp) # 8-byte Folded Reload add ra, t4, ra - j .LBB0_542 -.LBB0_541: # %for.body.i27.i.i - # in Loop: Header=BB0_542 Depth=4 + j .LBB0_541 +.LBB0_540: # %for.body.i27.i.i + # in Loop: Header=BB0_541 Depth=4 addi s6, s6, 1 addi ra, ra, 1 mv t2, t4 mv s7, s2 - beq s6, s0, .LBB0_551 -.LBB0_542: # %for.body.i27.i.i + beq s6, s0, .LBB0_550 +.LBB0_541: # %for.body.i27.i.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_521 Depth=2 - # Parent Loop BB0_526 Depth=3 + # Parent Loop BB0_520 Depth=2 + # Parent Loop BB0_525 Depth=3 # => This Inner Loop Header: Depth=4 lbu t4, 0(ra) xor s1, s1, t4 seqz s1, s1 addw t3, t3, s1 - blt s7, t3, .LBB0_544 -# %bb.543: # %for.body.i27.i.i - # in Loop: Header=BB0_542 Depth=4 + blt s7, t3, .LBB0_543 +# %bb.542: # %for.body.i27.i.i + # in Loop: Header=BB0_541 Depth=4 mv t3, s7 -.LBB0_544: # %for.body.i27.i.i - # in Loop: Header=BB0_542 Depth=4 +.LBB0_543: # %for.body.i27.i.i + # in Loop: Header=BB0_541 Depth=4 lbu s1, 0(s6) xor t2, s1, t2 seqz t2, t2 addw a2, a2, t2 - blt s7, a2, .LBB0_547 -# %bb.545: # %for.body.i27.i.i - # in Loop: Header=BB0_542 Depth=4 + blt s7, a2, .LBB0_546 +# %bb.544: # %for.body.i27.i.i + # in Loop: Header=BB0_541 Depth=4 mv a2, s7 addiw s2, t3, -1 addiw t2, s7, -1 - bge t2, s2, .LBB0_548 -.LBB0_546: # %for.body.i27.i.i - # in Loop: Header=BB0_542 Depth=4 + bge t2, s2, .LBB0_547 +.LBB0_545: # %for.body.i27.i.i + # in Loop: Header=BB0_541 Depth=4 xor t2, s1, t4 seqz t2, t2 addw t2, s7, t2 - blt t2, s2, .LBB0_541 - j .LBB0_549 -.LBB0_547: # %for.body.i27.i.i - # in Loop: Header=BB0_542 Depth=4 + blt t2, s2, .LBB0_540 + j .LBB0_548 +.LBB0_546: # %for.body.i27.i.i + # in Loop: Header=BB0_541 Depth=4 addiw s2, t3, -1 addiw t2, a2, -1 - blt t2, s2, .LBB0_546 -.LBB0_548: # %for.body.i27.i.i - # in Loop: Header=BB0_542 Depth=4 + blt t2, s2, .LBB0_545 +.LBB0_547: # %for.body.i27.i.i + # in Loop: Header=BB0_541 Depth=4 mv s2, t2 xor t2, s1, t4 seqz t2, t2 addw t2, s7, t2 - blt t2, s2, .LBB0_541 -.LBB0_549: # %for.body.i27.i.i - # in Loop: Header=BB0_542 Depth=4 + blt t2, s2, .LBB0_540 +.LBB0_548: # %for.body.i27.i.i + # in Loop: Header=BB0_541 Depth=4 mv s2, t2 - j .LBB0_541 -.LBB0_550: # in Loop: Header=BB0_526 Depth=3 + j .LBB0_540 +.LBB0_549: # in Loop: Header=BB0_525 Depth=3 mv s2, s7 -.LBB0_551: # %SWscore.exit51.i.i - # in Loop: Header=BB0_526 Depth=3 +.LBB0_550: # %SWscore.exit51.i.i + # in Loop: Header=BB0_525 Depth=3 li s6, 2 - bltu s2, a5, .LBB0_524 -# %bb.552: # %if.end17.i.i - # in Loop: Header=BB0_526 Depth=3 + bltu s2, a5, .LBB0_523 +# %bb.551: # %if.end17.i.i + # in Loop: Header=BB0_525 Depth=3 lbu a2, 1(s11) lbu a5, 0(s11) slli a2, a2, 8 @@ -4646,16 +4635,16 @@ or a2, a2, a5 sh a2, 474(sp) ld a2, 424(sp) # 8-byte Folded Reload - beqz a2, .LBB0_524 -# %bb.553: # %for.body.i.i.preheader - # in Loop: Header=BB0_526 Depth=3 + beqz a2, .LBB0_523 +# %bb.552: # %for.body.i.i.preheader + # in Loop: Header=BB0_525 Depth=3 li a5, 0 ld a2, 384(sp) # 8-byte Folded Reload ld t3, 424(sp) # 8-byte Folded Reload -.LBB0_554: # %for.body.i.i +.LBB0_553: # %for.body.i.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_521 Depth=2 - # Parent Loop BB0_526 Depth=3 + # Parent Loop BB0_520 Depth=2 + # Parent Loop BB0_525 Depth=3 # => This Inner Loop Header: Depth=4 lbu t2, 1(a2) lbu t4, 0(a2) @@ -4671,9 +4660,9 @@ or t2, s0, t2 slli t4, t4, 16 or t4, t4, s2 - beq t4, t2, .LBB0_557 -# %bb.555: # %if.end33.i.i - # in Loop: Header=BB0_554 Depth=4 + beq t4, t2, .LBB0_556 +# %bb.554: # %if.end33.i.i + # in Loop: Header=BB0_553 Depth=4 lbu t2, 5(a2) lbu t4, 4(a2) lbu s0, 6(a2) @@ -4688,57 +4677,57 @@ or t2, s0, t2 slli t4, t4, 16 or t4, t4, s2 - beq t4, t2, .LBB0_558 -# %bb.556: # %for.inc.i.i - # in Loop: Header=BB0_554 Depth=4 + beq t4, t2, .LBB0_557 +# %bb.555: # %for.inc.i.i + # in Loop: Header=BB0_553 Depth=4 addi a5, a5, 1 addi t3, t3, -1 addi a2, a2, 8 - bnez t3, .LBB0_554 - j .LBB0_524 -.LBB0_557: # in Loop: Header=BB0_526 Depth=3 + bnez t3, .LBB0_553 + j .LBB0_523 +.LBB0_556: # in Loop: Header=BB0_525 Depth=3 li a2, 1 - j .LBB0_559 -.LBB0_558: # in Loop: Header=BB0_526 Depth=3 + j .LBB0_558 +.LBB0_557: # in Loop: Header=BB0_525 Depth=3 li a2, -1 -.LBB0_559: # %if.then97.i - # in Loop: Header=BB0_526 Depth=3 +.LBB0_558: # %if.then97.i + # in Loop: Header=BB0_525 Depth=3 sw a5, 12(s10) sw a2, 24(s10) ld a2, 400(sp) # 8-byte Folded Reload addiw a2, a2, 1 sd a2, 400(sp) # 8-byte Folded Spill - j .LBB0_525 -.LBB0_560: # %for.end105.i - # in Loop: Header=BB0_521 Depth=2 + j .LBB0_524 +.LBB0_559: # %for.end105.i + # in Loop: Header=BB0_520 Depth=2 ld s11, 248(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload + ld s9, 200(sp) # 8-byte Folded Reload li a2, 1 ld a7, 128(sp) # 8-byte Folded Reload ld s8, 392(sp) # 8-byte Folded Reload ld t0, 376(sp) # 8-byte Folded Reload ld a3, 400(sp) # 8-byte Folded Reload - bne a3, a2, .LBB0_519 -# %bb.561: # %for.body112.lr.ph.i - # in Loop: Header=BB0_521 Depth=2 + bne a3, a2, .LBB0_518 +# %bb.560: # %for.body112.lr.ph.i + # in Loop: Header=BB0_520 Depth=2 addi a2, a0, 12 li a3, 28 mul a1, a1, a3 add a1, a2, a1 - j .LBB0_563 -.LBB0_562: # %for.inc169.i - # in Loop: Header=BB0_563 Depth=3 + j .LBB0_562 +.LBB0_561: # %for.inc169.i + # in Loop: Header=BB0_562 Depth=3 addi a2, a2, 28 - beq a2, a1, .LBB0_519 -.LBB0_563: # %for.body112.i + beq a2, a1, .LBB0_518 +.LBB0_562: # %for.body112.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_521 Depth=2 + # Parent Loop BB0_520 Depth=2 # => This Inner Loop Header: Depth=3 lw a3, 12(a2) - beqz a3, .LBB0_562 -# %bb.564: # %if.then118.i - # in Loop: Header=BB0_563 Depth=3 + beqz a3, .LBB0_561 +# %bb.563: # %if.then118.i + # in Loop: Header=BB0_562 Depth=3 lw a4, 32(s8) add a4, a4, a3 sw a4, 32(s8) @@ -4768,78 +4757,78 @@ sw a3, 4(t0) lw a3, -4(a2) sw a3, 0(t0) - j .LBB0_562 -.LBB0_565: # %for.end183.i563 + j .LBB0_561 +.LBB0_564: # %for.end183.i563 # in Loop: Header=BB0_6 Depth=1 lw a0, 32(s8) - beqz a0, .LBB0_567 -# %bb.566: # in Loop: Header=BB0_6 Depth=1 + beqz a0, .LBB0_566 +# %bb.565: # in Loop: Header=BB0_6 Depth=1 ld s7, 464(sp) # 8-byte Folded Reload - ld s1, 448(sp) # 8-byte Folded Reload + ld a5, 448(sp) # 8-byte Folded Reload sext.w a0, a2 - bgeu a0, s6, .LBB0_597 - j .LBB0_614 -.LBB0_567: # %for.cond188.preheader.i + bgeu a0, s6, .LBB0_596 + j .LBB0_613 +.LBB0_566: # %for.cond188.preheader.i # in Loop: Header=BB0_6 Depth=1 sext.w a0, a2 ld s7, 464(sp) # 8-byte Folded Reload - ld s1, 448(sp) # 8-byte Folded Reload - bltu a0, s6, .LBB0_594 -# %bb.568: # %for.body193.i.preheader + ld a5, 448(sp) # 8-byte Folded Reload + bltu a0, s6, .LBB0_593 +# %bb.567: # %for.body193.i.preheader # in Loop: Header=BB0_6 Depth=1 + li a7, 0 li a6, 0 - li a5, 0 - li a7, 1 + li t0, 1 # implicit-def: $x10 # kill: killed $x10 - j .LBB0_571 -.LBB0_569: # %if.then219.i - # in Loop: Header=BB0_571 Depth=2 + j .LBB0_570 +.LBB0_568: # %if.then219.i + # in Loop: Header=BB0_570 Depth=2 srli a3, a3, 2 - ld a0, 72(sp) # 8-byte Folded Reload + ld a0, 64(sp) # 8-byte Folded Reload and a0, a3, a0 - addw a6, a0, a6 -.LBB0_570: # %cleanup283.i - # in Loop: Header=BB0_571 Depth=2 + addw a7, a0, a7 +.LBB0_569: # %cleanup283.i + # in Loop: Header=BB0_570 Depth=2 ld s5, 440(sp) # 8-byte Folded Reload - addi a7, a7, 1 + addi t0, t0, 1 slli a0, a2, 32 srli a0, a0, 32 - bgeu a7, a0, .LBB0_595 -.LBB0_571: # %for.body193.i + bgeu t0, a0, .LBB0_594 +.LBB0_570: # %for.body193.i # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_578 Depth 3 - # Child Loop BB0_580 Depth 4 + # Child Loop BB0_577 Depth 3 + # Child Loop BB0_579 Depth 4 ld a0, 0(s5) - slli a1, a7, 3 + slli a1, t0, 3 add a0, a0, a1 ld s5, -8(a0) ld s4, 0(a0) lw a0, 12(s5) lw a1, 4(s4) addiw a3, a0, 1 - bltu a3, a1, .LBB0_570 -# %bb.572: # %if.end213.i - # in Loop: Header=BB0_571 Depth=2 + bltu a3, a1, .LBB0_569 +# %bb.571: # %if.end213.i + # in Loop: Header=BB0_570 Depth=2 lw a3, 28(s5) slli a4, a3, 62 srai a4, a4, 62 - bgtz a4, .LBB0_569 -# %bb.573: # %if.end224.i - # in Loop: Header=BB0_571 Depth=2 - bltz a4, .LBB0_591 -# %bb.574: # %if.end237.i - # in Loop: Header=BB0_571 Depth=2 + bgtz a4, .LBB0_568 +# %bb.572: # %if.end224.i + # in Loop: Header=BB0_570 Depth=2 + bltz a4, .LBB0_590 +# %bb.573: # %if.end237.i + # in Loop: Header=BB0_570 Depth=2 subw a0, a0, a1 addiw a1, a0, 2 sd a1, 416(sp) # 8-byte Folded Spill - beqz a1, .LBB0_592 -# %bb.575: # %for.body247.lr.ph.i - # in Loop: Header=BB0_571 Depth=2 - sd a7, 352(sp) # 8-byte Folded Spill - sd a6, 360(sp) # 8-byte Folded Spill - sd a5, 368(sp) # 8-byte Folded Spill + beqz a1, .LBB0_591 +# %bb.574: # %for.body247.lr.ph.i + # in Loop: Header=BB0_570 Depth=2 + sd t0, 352(sp) # 8-byte Folded Spill + sd a7, 360(sp) # 8-byte Folded Spill + sd a6, 368(sp) # 8-byte Folded Spill li s3, 0 li s10, 0 li s11, 0 @@ -4848,19 +4837,19 @@ li a0, -1 sd a0, 384(sp) # 8-byte Folded Spill sd s5, 400(sp) # 8-byte Folded Spill - j .LBB0_578 -.LBB0_576: # in Loop: Header=BB0_578 Depth=3 + j .LBB0_577 +.LBB0_575: # in Loop: Header=BB0_577 Depth=3 ld s8, 392(sp) # 8-byte Folded Reload -.LBB0_577: # %for.inc266.i - # in Loop: Header=BB0_578 Depth=3 +.LBB0_576: # %for.inc266.i + # in Loop: Header=BB0_577 Depth=3 addiw s3, s3, 1 ld a0, 416(sp) # 8-byte Folded Reload - beq s3, a0, .LBB0_590 -.LBB0_578: # %for.body247.i + beq s3, a0, .LBB0_589 +.LBB0_577: # %for.body247.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_571 Depth=2 + # Parent Loop BB0_570 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_580 Depth 4 + # Child Loop BB0_579 Depth 4 lw a0, 8(s5) lw a1, 12(s5) ld s1, 408(sp) # 8-byte Folded Reload @@ -4884,9 +4873,9 @@ sw zero, 492(sp) li a0, -1 sw a0, 484(sp) - beqz a1, .LBB0_582 -# %bb.579: # %for.body.us.us.i.preheader - # in Loop: Header=BB0_578 Depth=3 + beqz a1, .LBB0_581 +# %bb.578: # %for.body.us.us.i.preheader + # in Loop: Header=BB0_577 Depth=3 sd s11, 424(sp) # 8-byte Folded Spill sd s10, 432(sp) # 8-byte Folded Spill mv s5, s4 @@ -4896,10 +4885,10 @@ li s7, 4 ld s11, 464(sp) # 8-byte Folded Reload ld s10, 448(sp) # 8-byte Folded Reload -.LBB0_580: # %for.body.us.us.i +.LBB0_579: # %for.body.us.us.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_571 Depth=2 - # Parent Loop BB0_578 Depth=3 + # Parent Loop BB0_570 Depth=2 + # Parent Loop BB0_577 Depth=3 # => This Inner Loop Header: Depth=4 ld a0, 8(s6) add a0, a0, s7 @@ -4930,9 +4919,9 @@ addi s4, s4, 1 addi s7, s7, 8 addiw s9, s9, 1 - bltu s4, a0, .LBB0_580 -# %bb.581: # %compute_max_score.exit.loopexit - # in Loop: Header=BB0_578 Depth=3 + bltu s4, a0, .LBB0_579 +# %bb.580: # %compute_max_score.exit.loopexit + # in Loop: Header=BB0_577 Depth=3 lw s0, 488(sp) mv s9, s6 li s6, 2 @@ -4940,91 +4929,92 @@ ld s5, 400(sp) # 8-byte Folded Reload ld s10, 432(sp) # 8-byte Folded Reload ld s11, 424(sp) # 8-byte Folded Reload - j .LBB0_583 -.LBB0_582: # in Loop: Header=BB0_578 Depth=3 + j .LBB0_582 +.LBB0_581: # in Loop: Header=BB0_577 Depth=3 li s0, 0 -.LBB0_583: # %compute_max_score.exit - # in Loop: Header=BB0_578 Depth=3 +.LBB0_582: # %compute_max_score.exit + # in Loop: Header=BB0_577 Depth=3 mv a0, s8 call free ld s7, 464(sp) # 8-byte Folded Reload - ld s1, 448(sp) # 8-byte Folded Reload - bltu s0, s10, .LBB0_576 -# %bb.584: # %if.end.i142.i - # in Loop: Header=BB0_578 Depth=3 + ld a5, 448(sp) # 8-byte Folded Reload + bltu s0, s10, .LBB0_575 +# %bb.583: # %if.end.i142.i + # in Loop: Header=BB0_577 Depth=3 ld s8, 392(sp) # 8-byte Folded Reload - bgeu s10, s0, .LBB0_587 -# %bb.585: # %if.end.i142.if.then264_crit_edge.i - # in Loop: Header=BB0_578 Depth=3 + bgeu s10, s0, .LBB0_586 +# %bb.584: # %if.end.i142.if.then264_crit_edge.i + # in Loop: Header=BB0_577 Depth=3 lw a1, 484(sp) lw a0, 492(sp) -.LBB0_586: # %if.then264.i - # in Loop: Header=BB0_578 Depth=3 +.LBB0_585: # %if.then264.i + # in Loop: Header=BB0_577 Depth=3 lw a2, 496(sp) sd a2, 376(sp) # 8-byte Folded Spill mv s11, a0 mv s10, s0 sd a1, 384(sp) # 8-byte Folded Spill - j .LBB0_577 -.LBB0_587: # %if.end6.i.i - # in Loop: Header=BB0_578 Depth=3 + j .LBB0_576 +.LBB0_586: # %if.end6.i.i + # in Loop: Header=BB0_577 Depth=3 lw a0, 492(sp) - bltu a0, s11, .LBB0_577 -# %bb.588: # %if.end10.i.i - # in Loop: Header=BB0_578 Depth=3 + bltu a0, s11, .LBB0_576 +# %bb.587: # %if.end10.i.i + # in Loop: Header=BB0_577 Depth=3 lw a1, 484(sp) - bltu s11, a0, .LBB0_586 -# %bb.589: # %if.end10.i.i - # in Loop: Header=BB0_578 Depth=3 + bltu s11, a0, .LBB0_585 +# %bb.588: # %if.end10.i.i + # in Loop: Header=BB0_577 Depth=3 ld a2, 384(sp) # 8-byte Folded Reload - bgeu a1, a2, .LBB0_577 - j .LBB0_586 -.LBB0_590: # %for.end268.loopexit.i - # in Loop: Header=BB0_571 Depth=2 + bgeu a1, a2, .LBB0_576 + j .LBB0_585 +.LBB0_589: # %for.end268.loopexit.i + # in Loop: Header=BB0_570 Depth=2 lw a2, 16(s8) - ld a5, 368(sp) # 8-byte Folded Reload - ld a6, 360(sp) # 8-byte Folded Reload - ld a7, 352(sp) # 8-byte Folded Reload - j .LBB0_593 -.LBB0_591: # %if.then231.i - # in Loop: Header=BB0_571 Depth=2 + ld a6, 368(sp) # 8-byte Folded Reload + ld a7, 360(sp) # 8-byte Folded Reload + ld t0, 352(sp) # 8-byte Folded Reload + j .LBB0_592 +.LBB0_590: # %if.then231.i + # in Loop: Header=BB0_570 Depth=2 srli a3, a3, 2 - ld a0, 72(sp) # 8-byte Folded Reload + ld a0, 64(sp) # 8-byte Folded Reload and a0, a3, a0 - addw a5, a0, a5 - j .LBB0_570 -.LBB0_592: # in Loop: Header=BB0_571 Depth=2 + addw a6, a0, a6 + j .LBB0_569 +.LBB0_591: # in Loop: Header=BB0_570 Depth=2 li s10, 0 -.LBB0_593: # %for.end268.i - # in Loop: Header=BB0_571 Depth=2 +.LBB0_592: # %for.end268.i + # in Loop: Header=BB0_570 Depth=2 ld a1, 376(sp) # 8-byte Folded Reload sext.w a0, a1 sgtz a0, a0 negw a0, a0 and a0, a0, s10 - addw a6, a0, a6 + addw a7, a0, a7 sraiw a0, a1, 31 and a0, a0, s10 - addw a5, a0, a5 + addw a6, a0, a6 ld s11, 248(sp) # 8-byte Folded Reload - j .LBB0_570 -.LBB0_594: # in Loop: Header=BB0_6 Depth=1 - li a5, 0 + j .LBB0_569 +.LBB0_593: # in Loop: Header=BB0_6 Depth=1 li a6, 0 - j .LBB0_596 -.LBB0_595: # in Loop: Header=BB0_6 Depth=1 + li a7, 0 + j .LBB0_595 +.LBB0_594: # in Loop: Header=BB0_6 Depth=1 ld s10, 264(sp) # 8-byte Folded Reload -.LBB0_596: # %for.end293.i +.LBB0_595: # %for.end293.i # in Loop: Header=BB0_6 Depth=1 - sltu a0, a6, a5 + sltu a0, a7, a6 negw a0, a0 ori a0, a0, 1 ld a1, 456(sp) # 8-byte Folded Reload sw a0, 0(a1) sext.w a0, a2 - bltu a0, s6, .LBB0_614 -.LBB0_597: # %for.body307.i.preheader + bltu a0, s6, .LBB0_613 +.LBB0_596: # %for.body307.i.preheader # in Loop: Header=BB0_6 Depth=1 + mv s6, a5 li s0, 1 # implicit-def: $x10 # kill: killed $x10 @@ -5034,12 +5024,12 @@ # kill: killed $x10 # implicit-def: $x10 # kill: killed $x10 - j .LBB0_601 -.LBB0_598: # in Loop: Header=BB0_601 Depth=2 + j .LBB0_600 +.LBB0_597: # in Loop: Header=BB0_600 Depth=2 li s5, 0 li s2, -1 -.LBB0_599: # %for.end375.i - # in Loop: Header=BB0_601 Depth=2 +.LBB0_598: # %for.end375.i + # in Loop: Header=BB0_600 Depth=2 ld a0, 424(sp) # 8-byte Folded Reload andi a0, a0, 3 slli s2, s2, 24 @@ -5047,12 +5037,11 @@ srli a1, s5, 40 or a0, s2, a0 or a0, a0, a1 - ld a1, 416(sp) # 8-byte Folded Reload - sw a0, 28(a1) + sw a0, 28(s1) ld a0, 464(sp) # 8-byte Folded Reload - sw a0, 8(a1) + sw a0, 8(s1) ld a0, 448(sp) # 8-byte Folded Reload - sw a0, 12(a1) + sw a0, 12(s1) addi a0, a0, 1 sw a0, 4(s9) ld a0, 432(sp) # 8-byte Folded Reload @@ -5062,58 +5051,57 @@ ld s11, 248(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload ld s5, 440(sp) # 8-byte Folded Reload -.LBB0_600: # %cleanup409.i - # in Loop: Header=BB0_601 Depth=2 +.LBB0_599: # %cleanup409.i + # in Loop: Header=BB0_600 Depth=2 addi s0, s0, 1 slli a0, a2, 32 srli a0, a0, 32 - bgeu s0, a0, .LBB0_614 -.LBB0_601: # %for.body307.i + bgeu s0, a0, .LBB0_613 +.LBB0_600: # %for.body307.i # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_609 Depth 3 + # Child Loop BB0_608 Depth 3 ld a0, 0(s5) slli a1, s0, 3 add a1, a0, a1 - ld a4, -8(a1) - lw a0, 28(a4) + ld s1, -8(a1) + lw a0, 28(s1) ld s9, 0(a1) - bltz a0, .LBB0_603 -# %bb.602: # %land.lhs.true.i567 - # in Loop: Header=BB0_601 Depth=2 + bltz a0, .LBB0_602 +# %bb.601: # %land.lhs.true.i567 + # in Loop: Header=BB0_600 Depth=2 ld a1, 456(sp) # 8-byte Folded Reload lw a1, 0(a1) slli a0, a0, 62 srai a0, a0, 62 mulw a0, a1, a0 - bgtz a0, .LBB0_600 -.LBB0_603: # %lor.lhs.false.i566 - # in Loop: Header=BB0_601 Depth=2 - lw a0, 12(a4) + bgtz a0, .LBB0_599 +.LBB0_602: # %lor.lhs.false.i566 + # in Loop: Header=BB0_600 Depth=2 + lw a0, 12(s1) lw a1, 4(s9) addiw a3, a0, 1 - bltu a3, a1, .LBB0_600 -# %bb.604: # %if.end342.i - # in Loop: Header=BB0_601 Depth=2 + bltu a3, a1, .LBB0_599 +# %bb.603: # %if.end342.i + # in Loop: Header=BB0_600 Depth=2 subw a0, a0, a1 addiw s10, a0, 2 - sd a4, 416(sp) # 8-byte Folded Spill - beqz s10, .LBB0_598 -# %bb.605: # %for.body353.lr.ph.i - # in Loop: Header=BB0_601 Depth=2 + beqz s10, .LBB0_597 +# %bb.604: # %for.body353.lr.ph.i + # in Loop: Header=BB0_600 Depth=2 li s11, 0 li s5, 0 li s4, 0 - addi s8, a4, 8 + addi s8, s1, 8 not s3, a0 li s2, -1 - j .LBB0_609 -.LBB0_606: # %if.end.i147.if.then371_crit_edge.i - # in Loop: Header=BB0_609 Depth=3 + j .LBB0_608 +.LBB0_605: # %if.end.i147.if.then371_crit_edge.i + # in Loop: Header=BB0_608 Depth=3 lw a2, 484(sp) lw a1, 492(sp) -.LBB0_607: # %if.then371.i - # in Loop: Header=BB0_609 Depth=3 +.LBB0_606: # %if.then371.i + # in Loop: Header=BB0_608 Depth=3 lw a3, 472(sp) sd a3, 464(sp) # 8-byte Folded Spill lw a3, 476(sp) @@ -5125,13 +5113,13 @@ mv s4, a1 mv s5, a0 mv s2, a2 -.LBB0_608: # %for.inc373.i - # in Loop: Header=BB0_609 Depth=3 +.LBB0_607: # %for.inc373.i + # in Loop: Header=BB0_608 Depth=3 addiw s11, s11, 1 - beq s10, s11, .LBB0_599 -.LBB0_609: # %for.body353.i + beq s10, s11, .LBB0_598 +.LBB0_608: # %for.body353.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_601 Depth=2 + # Parent Loop BB0_600 Depth=2 # => This Inner Loop Header: Depth=3 vsetivli zero, 2, e32, mf2, ta, ma vle32.v v8, (s8) @@ -5146,27 +5134,28 @@ lw a3, 0(a0) addi a2, sp, 472 mv a0, s7 - mv a1, s1 + mv a1, s6 call compute_max_score lw a0, 488(sp) - bltu a0, s5, .LBB0_608 -# %bb.610: # %if.end.i147.i - # in Loop: Header=BB0_609 Depth=3 - bltu s5, a0, .LBB0_606 -# %bb.611: # %if.end6.i149.i - # in Loop: Header=BB0_609 Depth=3 + bltu a0, s5, .LBB0_607 +# %bb.609: # %if.end.i147.i + # in Loop: Header=BB0_608 Depth=3 + bltu s5, a0, .LBB0_605 +# %bb.610: # %if.end6.i149.i + # in Loop: Header=BB0_608 Depth=3 lw a1, 492(sp) - bltu a1, s4, .LBB0_608 -# %bb.612: # %if.end10.i153.i - # in Loop: Header=BB0_609 Depth=3 + bltu a1, s4, .LBB0_607 +# %bb.611: # %if.end10.i153.i + # in Loop: Header=BB0_608 Depth=3 lw a2, 484(sp) - bltu s4, a1, .LBB0_607 -# %bb.613: # %if.end10.i153.i - # in Loop: Header=BB0_609 Depth=3 - bgeu a2, s2, .LBB0_608 - j .LBB0_607 -.LBB0_614: # %slide_intron.exit + bltu s4, a1, .LBB0_606 +# %bb.612: # %if.end10.i153.i + # in Loop: Header=BB0_608 Depth=3 + bgeu a2, s2, .LBB0_607 + j .LBB0_606 +.LBB0_613: # %slide_intron.exit # in Loop: Header=BB0_6 Depth=1 + li s6, 2 ld s0, 8(s11) lw a3, 16(s11) ld s1, 16(s10) @@ -5180,13 +5169,12 @@ sw zero, 480(sp) addiw a2, a2, -1 sw zero, 484(sp) - bltz a2, .LBB0_661 -# %bb.615: # %for.body.preheader.i601 + bltz a2, .LBB0_660 +# %bb.614: # %for.body.preheader.i601 # in Loop: Header=BB0_6 Depth=1 - li s7, 2 li a0, 0 li s3, 0 - li s6, 0 + li s7, 0 li s10, 0 addi a4, s0, -1 sd a4, 432(sp) # 8-byte Folded Spill @@ -5197,12 +5185,12 @@ mv s8, a3 sd a1, 464(sp) # 8-byte Folded Spill sd a1, 448(sp) # 8-byte Folded Spill - j .LBB0_618 -.LBB0_616: # in Loop: Header=BB0_618 Depth=2 + j .LBB0_617 +.LBB0_615: # in Loop: Header=BB0_617 Depth=2 li a6, 0 li a3, 0 -.LBB0_617: # %cleanup.i622 - # in Loop: Header=BB0_618 Depth=2 +.LBB0_616: # %cleanup.i622 + # in Loop: Header=BB0_617 Depth=2 lw a0, 8(s11) lw a4, 12(s11) add a1, a2, a1 @@ -5222,17 +5210,17 @@ mul a2, a2, a4 divw a1, a2, a1 sw a1, 16(s11) - sd s6, 0(a3) - ld s6, 568(sp) + sd s7, 0(a3) + ld s7, 568(sp) addi a2, s9, -1 mv s5, s11 - blez s9, .LBB0_662 -.LBB0_618: # %for.body.i602 + blez s9, .LBB0_661 +.LBB0_617: # %for.body.i602 # Parent Loop BB0_6 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_647 Depth 3 - # Child Loop BB0_655 Depth 4 - # Child Loop BB0_658 Depth 4 + # Child Loop BB0_646 Depth 3 + # Child Loop BB0_654 Depth 4 + # Child Loop BB0_657 Depth 4 ld a1, 440(sp) # 8-byte Folded Reload ld a1, 0(a1) mv s9, a2 @@ -5242,41 +5230,41 @@ lw a5, 12(s11) lw a1, 4(s5) addiw a2, a5, 1 - bne a1, a2, .LBB0_622 -# %bb.619: # %if.else.i635 - # in Loop: Header=BB0_618 Depth=2 + bne a1, a2, .LBB0_621 +# %bb.618: # %if.else.i635 + # in Loop: Header=BB0_617 Depth=2 li s4, 1 lw a4, 8(s11) lw a1, 0(s5) not a2, a4 addw s2, a1, a2 - beqz s2, .LBB0_626 -# %bb.620: # %land.lhs.true.i636 - # in Loop: Header=BB0_618 Depth=2 - beqz a0, .LBB0_625 -# %bb.621: # %if.then26.i - # in Loop: Header=BB0_618 Depth=2 + beqz s2, .LBB0_625 +# %bb.619: # %land.lhs.true.i636 + # in Loop: Header=BB0_617 Depth=2 + beqz a0, .LBB0_624 +# %bb.620: # %if.then26.i + # in Loop: Header=BB0_617 Depth=2 li a0, 16 call xmalloc sb s4, 12(a0) sw s2, 8(a0) - sd s6, 0(a0) + sd s7, 0(a0) lw a4, 8(s11) lw a5, 12(s11) - mv s6, a0 - j .LBB0_626 -.LBB0_622: # %if.then.i606 - # in Loop: Header=BB0_618 Depth=2 - beqz a0, .LBB0_624 -# %bb.623: # %if.then8.i608 - # in Loop: Header=BB0_618 Depth=2 + mv s7, a0 + j .LBB0_625 +.LBB0_621: # %if.then.i606 + # in Loop: Header=BB0_617 Depth=2 + beqz a0, .LBB0_623 +# %bb.622: # %if.then8.i608 + # in Loop: Header=BB0_617 Depth=2 li a0, 40 call xmalloc ld a2, 392(sp) # 8-byte Folded Reload ld a1, 0(a2) sd a1, 0(a0) sd a0, 0(a2) - sd s6, 8(a0) + sd s7, 8(a0) lw a1, 0(s5) sw a1, 16(a0) lw a2, 4(s5) @@ -5290,17 +5278,17 @@ sw a1, 28(a0) sw s3, 32(a0) lw a5, 12(s11) - li s6, 0 + li s7, 0 li s3, 0 -.LBB0_624: # %if.end.i613 - # in Loop: Header=BB0_618 Depth=2 +.LBB0_623: # %if.end.i613 + # in Loop: Header=BB0_617 Depth=2 lw a4, 8(s11) sd a5, 448(sp) # 8-byte Folded Spill -.LBB0_625: # %if.end34.i - # in Loop: Header=BB0_618 Depth=2 +.LBB0_624: # %if.end34.i + # in Loop: Header=BB0_617 Depth=2 mv s8, a4 -.LBB0_626: # %if.end34.i - # in Loop: Header=BB0_618 Depth=2 +.LBB0_625: # %if.end34.i + # in Loop: Header=BB0_617 Depth=2 lw a3, 4(s11) lw a2, 0(s11) subw a0, a5, a3 @@ -5319,9 +5307,9 @@ mv a0, s0 mv a1, s1 call align_get_dist - bltz a0, .LBB0_660 -# %bb.627: # %if.end58.i - # in Loop: Header=BB0_618 Depth=2 + bltz a0, .LBB0_659 +# %bb.626: # %if.end58.i + # in Loop: Header=BB0_617 Depth=2 mv s2, a0 lw a2, 0(s11) lw a3, 4(s11) @@ -5341,56 +5329,46 @@ mv a6, s2 call align_path ld a0, 560(sp) - beqz a0, .LBB0_660 -# %bb.628: # %if.end68.i - # in Loop: Header=BB0_618 Depth=2 + beqz a0, .LBB0_659 +# %bb.627: # %if.end68.i + # in Loop: Header=BB0_617 Depth=2 addi a0, sp, 568 addi a1, sp, 560 addi a2, sp, 552 call Condense_both_Ends lw a0, 8(s5) - beqz a0, .LBB0_630 -# %bb.629: # in Loop: Header=BB0_618 Depth=2 - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 576 - vl2r.v v16, (a0) # Unknown-size Folded Reload + beqz a0, .LBB0_629 +# %bb.628: # in Loop: Header=BB0_617 Depth=2 li t6, 1 li s5, 3 ld a0, 568(sp) - beqz s9, .LBB0_636 - j .LBB0_641 -.LBB0_630: # %land.lhs.true71.i - # in Loop: Header=BB0_618 Depth=2 + beqz s9, .LBB0_635 + j .LBB0_640 +.LBB0_629: # %land.lhs.true71.i + # in Loop: Header=BB0_617 Depth=2 ld a0, 560(sp) lbu a1, 12(a0) - csrr a2, vlenb - slli a2, a2, 1 - add a2, sp, a2 - addi a2, a2, 576 - vl2r.v v16, (a2) # Unknown-size Folded Reload li t6, 1 - bne a1, t6, .LBB0_635 -# %bb.631: # %if.then76.i - # in Loop: Header=BB0_618 Depth=2 + bne a1, t6, .LBB0_634 +# %bb.630: # %if.then76.i + # in Loop: Header=BB0_617 Depth=2 lw a1, 8(a0) lw a2, 8(s11) subw a2, a2, a1 sw a2, 8(s11) li s5, 3 - beqz s6, .LBB0_634 -# %bb.632: # %land.lhs.true86.i - # in Loop: Header=BB0_618 Depth=2 - lbu a2, 12(s6) - bne a2, t6, .LBB0_634 -# %bb.633: # %if.then91.i634 - # in Loop: Header=BB0_618 Depth=2 - lw a2, 8(s6) + beqz s7, .LBB0_633 +# %bb.631: # %land.lhs.true86.i + # in Loop: Header=BB0_617 Depth=2 + lbu a2, 12(s7) + bne a2, t6, .LBB0_633 +# %bb.632: # %if.then91.i634 + # in Loop: Header=BB0_617 Depth=2 + lw a2, 8(s7) add a2, a2, a1 - sw a2, 8(s6) -.LBB0_634: # %if.end95.i - # in Loop: Header=BB0_618 Depth=2 + sw a2, 8(s7) +.LBB0_633: # %if.end95.i + # in Loop: Header=BB0_617 Depth=2 subw s2, s2, a1 subw s8, s8, a1 call free @@ -5399,69 +5377,59 @@ sd zero, 0(a0) ld a0, 552(sp) sd a0, 560(sp) - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 576 - vl2r.v v16, (a0) # Unknown-size Folded Reload ld a0, 568(sp) - beqz s9, .LBB0_636 - j .LBB0_641 -.LBB0_635: # in Loop: Header=BB0_618 Depth=2 + beqz s9, .LBB0_635 + j .LBB0_640 +.LBB0_634: # in Loop: Header=BB0_617 Depth=2 li s5, 3 ld a0, 568(sp) - bnez s9, .LBB0_641 -.LBB0_636: # %if.end97.i - # in Loop: Header=BB0_618 Depth=2 - beqz a0, .LBB0_641 -# %bb.637: # %land.lhs.true102.i - # in Loop: Header=BB0_618 Depth=2 + bnez s9, .LBB0_640 +.LBB0_635: # %if.end97.i + # in Loop: Header=BB0_617 Depth=2 + beqz a0, .LBB0_640 +# %bb.636: # %land.lhs.true102.i + # in Loop: Header=BB0_617 Depth=2 lbu a1, 12(a0) - bne a1, t6, .LBB0_643 -# %bb.638: # %if.then107.i631 - # in Loop: Header=BB0_618 Depth=2 + bne a1, t6, .LBB0_642 +# %bb.637: # %if.then107.i631 + # in Loop: Header=BB0_617 Depth=2 lw a1, 8(a0) lw a2, 0(s11) add a2, a2, a1 sw a2, 0(s11) ld a2, 560(sp) ld s4, 0(a0) - bne a2, a0, .LBB0_640 -# %bb.639: # %if.then117.i - # in Loop: Header=BB0_618 Depth=2 + bne a2, a0, .LBB0_639 +# %bb.638: # %if.then117.i + # in Loop: Header=BB0_617 Depth=2 sd s4, 560(sp) -.LBB0_640: # %if.end118.i - # in Loop: Header=BB0_618 Depth=2 +.LBB0_639: # %if.end118.i + # in Loop: Header=BB0_617 Depth=2 subw s2, s2, a1 call free sd s4, 568(sp) - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 576 - vl2r.v v16, (a0) # Unknown-size Folded Reload li t6, 1 lw a1, 0(s11) lw a2, 4(s11) add s3, s2, s3 - bnez s4, .LBB0_642 - j .LBB0_616 -.LBB0_641: # in Loop: Header=BB0_618 Depth=2 + bnez s4, .LBB0_641 + j .LBB0_615 +.LBB0_640: # in Loop: Header=BB0_617 Depth=2 mv s4, a0 lw a1, 0(s11) lw a2, 4(s11) add s3, s2, s3 - beqz a0, .LBB0_616 -.LBB0_642: # in Loop: Header=BB0_618 Depth=2 + beqz a0, .LBB0_615 +.LBB0_641: # in Loop: Header=BB0_617 Depth=2 mv a0, s4 - j .LBB0_644 -.LBB0_643: # %if.end119.thread.i - # in Loop: Header=BB0_618 Depth=2 + j .LBB0_643 +.LBB0_642: # %if.end119.thread.i + # in Loop: Header=BB0_617 Depth=2 lw a1, 0(s11) lw a2, 4(s11) add s3, s2, s3 -.LBB0_644: # %while.body.preheader.i - # in Loop: Header=BB0_618 Depth=2 +.LBB0_643: # %while.body.preheader.i + # in Loop: Header=BB0_617 Depth=2 ld s2, 224(sp) # 8-byte Folded Reload li a3, 0 li a6, 0 @@ -5473,52 +5441,52 @@ srli a5, a5, 32 ld a7, 432(sp) # 8-byte Folded Reload add a5, a7, a5 - j .LBB0_647 -.LBB0_645: # %sw.bb137.i - # in Loop: Header=BB0_647 Depth=3 + j .LBB0_646 +.LBB0_644: # %sw.bb137.i + # in Loop: Header=BB0_646 Depth=3 lw a7, 8(a0) add a3, a7, a3 add a6, a7, a6 add a4, a4, a7 -.LBB0_646: # %sw.epilog.i - # in Loop: Header=BB0_647 Depth=3 +.LBB0_645: # %sw.epilog.i + # in Loop: Header=BB0_646 Depth=3 ld a0, 0(a0) - beqz a0, .LBB0_617 -.LBB0_647: # %while.body.i621 + beqz a0, .LBB0_616 +.LBB0_646: # %while.body.i621 # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_618 Depth=2 + # Parent Loop BB0_617 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_655 Depth 4 - # Child Loop BB0_658 Depth 4 + # Child Loop BB0_654 Depth 4 + # Child Loop BB0_657 Depth 4 lbu a7, 12(a0) - beq a7, t6, .LBB0_653 + beq a7, t6, .LBB0_652 +# %bb.647: # %while.body.i621 + # in Loop: Header=BB0_646 Depth=3 + beq a7, s6, .LBB0_644 # %bb.648: # %while.body.i621 - # in Loop: Header=BB0_647 Depth=3 - beq a7, s7, .LBB0_645 -# %bb.649: # %while.body.i621 - # in Loop: Header=BB0_647 Depth=3 - bne a7, s5, .LBB0_646 -# %bb.650: # %for.cond146.preheader.i - # in Loop: Header=BB0_647 Depth=3 + # in Loop: Header=BB0_646 Depth=3 + bne a7, s5, .LBB0_645 +# %bb.649: # %for.cond146.preheader.i + # in Loop: Header=BB0_646 Depth=3 lw a7, 8(a0) - blez a7, .LBB0_646 -# %bb.651: # %for.body150.preheader.i - # in Loop: Header=BB0_647 Depth=3 - bgeu a7, s2, .LBB0_654 -# %bb.652: # in Loop: Header=BB0_647 Depth=3 + blez a7, .LBB0_645 +# %bb.650: # %for.body150.preheader.i + # in Loop: Header=BB0_646 Depth=3 + bgeu a7, s2, .LBB0_653 +# %bb.651: # in Loop: Header=BB0_646 Depth=3 li t2, 0 mv t0, a4 mv t1, a5 - j .LBB0_657 -.LBB0_653: # %sw.bb.i - # in Loop: Header=BB0_647 Depth=3 + j .LBB0_656 +.LBB0_652: # %sw.bb.i + # in Loop: Header=BB0_646 Depth=3 lw a7, 8(a0) add a3, a7, a3 add a6, a7, a6 add a5, a5, a7 - j .LBB0_646 -.LBB0_654: # %vector.ph - # in Loop: Header=BB0_647 Depth=3 + j .LBB0_645 +.LBB0_653: # %vector.ph + # in Loop: Header=BB0_646 Depth=3 ld t0, 312(sp) # 8-byte Folded Reload srli t0, t0, 3 slli t1, t0, 2 @@ -5530,60 +5498,61 @@ slli t0, t0, 2 add t0, sp, t0 addi t0, t0, 576 - vl1r.v v9, (t0) # Unknown-size Folded Reload - vmv1r.v v8, v9 - vmv.s.x v8, a6 - vmv2r.v v10, v16 + vl1r.v v12, (t0) # Unknown-size Folded Reload + vmv1r.v v10, v12 + vmv.s.x v10, a6 + csrr a6, vlenb + slli a6, a6, 1 + add a6, sp, a6 + addi a6, a6, 576 + vl2r.v v14, (a6) # Unknown-size Folded Reload + vmv2r.v v8, v14 add t0, a4, t2 - vmv1r.v v10, v8 - vmv1r.v v8, v9 - vmv.s.x v8, s10 - vmv2r.v v12, v16 + vmv1r.v v8, v10 + vmv.s.x v12, s10 + vmv2r.v v10, v14 add t1, a5, t2 - vmv1r.v v12, v8 + vmv1r.v v10, v12 mv a6, t2 mv t3, a4 mv t4, a5 -.LBB0_655: # %vector.body +.LBB0_654: # %vector.body # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_618 Depth=2 - # Parent Loop BB0_647 Depth=3 + # Parent Loop BB0_617 Depth=2 + # Parent Loop BB0_646 Depth=3 # => This Inner Loop Header: Depth=4 vsetvli t5, zero, e8, mf2, ta, ma - vle8.v v9, (t4) - vle8.v v14, (t3) - vmsne.vv v8, v9, v14 - vmseq.vv v0, v9, v14 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v14, v16, 1, v0 - vadd.vv v12, v12, v14 - vmv1r.v v0, v8 - vmerge.vim v8, v16, 1, v0 - vadd.vv v10, v10, v8 + vle8.v v12, (t4) + vle8.v v13, (t3) + vmseq.vv v0, v12, v13 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v10, v10, 1, v0.t + vadd.vi v12, v8, 1 + vmerge.vvm v8, v12, v8, v0 add t4, t4, s2 sub a6, a6, s2 add t3, t3, s2 - bnez a6, .LBB0_655 -# %bb.656: # %middle.block - # in Loop: Header=BB0_647 Depth=3 - vmv.s.x v8, zero - vredsum.vs v9, v10, v8 - vmv.x.s a6, v9 - vredsum.vs v8, v12, v8 + bnez a6, .LBB0_654 +# %bb.655: # %middle.block + # in Loop: Header=BB0_646 Depth=3 + vmv.s.x v12, zero + vredsum.vs v8, v8, v12 + vmv.x.s a6, v8 + vredsum.vs v8, v10, v12 vmv.x.s s10, v8 - beq t2, a7, .LBB0_659 -.LBB0_657: # %for.body150.i.preheader - # in Loop: Header=BB0_647 Depth=3 + beq t2, a7, .LBB0_658 +.LBB0_656: # %for.body150.i.preheader + # in Loop: Header=BB0_646 Depth=3 not t2, t2 add t2, t2, a7 slli t2, t2, 32 srli t2, t2, 32 add t2, t1, t2 addi t2, t2, 1 -.LBB0_658: # %for.body150.i +.LBB0_657: # %for.body150.i # Parent Loop BB0_6 Depth=1 - # Parent Loop BB0_618 Depth=2 - # Parent Loop BB0_647 Depth=3 + # Parent Loop BB0_617 Depth=2 + # Parent Loop BB0_646 Depth=3 # => This Inner Loop Header: Depth=4 lbu t3, 0(t1) lbu t4, 0(t0) @@ -5594,42 +5563,41 @@ add a6, a6, t4 addi t1, t1, 1 addi t0, t0, 1 - bne t1, t2, .LBB0_658 -.LBB0_659: # %sw.epilog.loopexit.i - # in Loop: Header=BB0_647 Depth=3 + bne t1, t2, .LBB0_657 +.LBB0_658: # %sw.epilog.loopexit.i + # in Loop: Header=BB0_646 Depth=3 addiw a7, a7, -1 add a4, a4, a7 add a5, a5, a7 addi a5, a5, 1 addi a4, a4, 1 - j .LBB0_646 -.LBB0_660: # %pluri_align.exit.thread + j .LBB0_645 +.LBB0_659: # %pluri_align.exit.thread # in Loop: Header=BB0_6 Depth=1 ld s11, 248(sp) # 8-byte Folded Reload - ld s7, 64(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload + ld s9, 200(sp) # 8-byte Folded Reload li s6, 2 ld s8, 392(sp) # 8-byte Folded Reload - j .LBB0_670 -.LBB0_661: # in Loop: Header=BB0_6 Depth=1 - ld s7, 64(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload + j .LBB0_669 +.LBB0_660: # in Loop: Header=BB0_6 Depth=1 + ld s9, 200(sp) # 8-byte Folded Reload + li s6, 2 lw a0, 20(s9) sw zero, 36(s8) ld s10, 264(sp) # 8-byte Folded Reload - beqz a0, .LBB0_670 + beqz a0, .LBB0_669 j .LBB0_5 -.LBB0_662: # %for.end189.i +.LBB0_661: # %for.end189.i # in Loop: Header=BB0_6 Depth=1 lw a0, 4(s11) addiw a0, a0, -1 ld a1, 464(sp) # 8-byte Folded Reload - beqz a0, .LBB0_665 -# %bb.663: # %for.end189.i + beqz a0, .LBB0_664 +# %bb.662: # %for.end189.i # in Loop: Header=BB0_6 Depth=1 - beq a0, a1, .LBB0_665 -# %bb.664: # %if.then197.i + beq a0, a1, .LBB0_664 +# %bb.663: # %if.then197.i # in Loop: Header=BB0_6 Depth=1 li a0, 40 call xmalloc @@ -5647,19 +5615,19 @@ ld a1, 448(sp) # 8-byte Folded Reload subw a1, a1, a2 addi a1, a1, 1 - ld s7, 64(sp) # 8-byte Folded Reload - ld s9, 168(sp) # 8-byte Folded Reload - j .LBB0_668 -.LBB0_665: # %if.else215.i + ld s9, 200(sp) # 8-byte Folded Reload + li s6, 2 + j .LBB0_667 +.LBB0_664: # %if.else215.i # in Loop: Header=BB0_6 Depth=1 li s0, 1 - ld s9, 168(sp) # 8-byte Folded Reload - bne a0, a1, .LBB0_667 -# %bb.666: # in Loop: Header=BB0_6 Depth=1 + ld s9, 200(sp) # 8-byte Folded Reload + bne a0, a1, .LBB0_666 +# %bb.665: # in Loop: Header=BB0_6 Depth=1 ld s11, 248(sp) # 8-byte Folded Reload - ld s7, 64(sp) # 8-byte Folded Reload - j .LBB0_669 -.LBB0_667: # %if.then218.i + li s6, 2 + j .LBB0_668 +.LBB0_666: # %if.then218.i # in Loop: Header=BB0_6 Depth=1 li a0, 40 call xmalloc @@ -5673,29 +5641,28 @@ subw a1, s8, a1 addi a1, a1, 1 sw a1, 24(a0) - ld s7, 64(sp) # 8-byte Folded Reload + li s6, 2 ld a1, 448(sp) # 8-byte Folded Reload -.LBB0_668: # %if.end236.sink.split.i +.LBB0_667: # %if.end236.sink.split.i # in Loop: Header=BB0_6 Depth=1 sw a1, 28(a0) - sd s6, 8(a0) + sd s7, 8(a0) sw s3, 32(a0) ld s11, 248(sp) # 8-byte Folded Reload -.LBB0_669: # %pluri_align.exit +.LBB0_668: # %pluri_align.exit # in Loop: Header=BB0_6 Depth=1 - li s6, 2 ld s8, 392(sp) # 8-byte Folded Reload lw a0, 20(s9) sw s10, 36(s8) ld s10, 264(sp) # 8-byte Folded Reload - beqz a0, .LBB0_670 + beqz a0, .LBB0_669 j .LBB0_5 -.LBB0_670: # %if.then595 +.LBB0_669: # %if.then595 # in Loop: Header=BB0_6 Depth=1 ld s0, 0(s8) - bnez s0, .LBB0_671 + bnez s0, .LBB0_670 j .LBB0_4 -.LBB0_671: # %while.body.i637 +.LBB0_670: # %while.body.i637 # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 ld s1, 0(s0) @@ -5704,24 +5671,23 @@ mv a0, s0 call free mv s0, s1 - bnez s1, .LBB0_671 + bnez s1, .LBB0_670 j .LBB0_4 -.LBB0_672: # %while.end245.loopexit.split.loop.exit323.i +.LBB0_671: # %while.end245.loopexit.split.loop.exit323.i # in Loop: Header=BB0_6 Depth=1 mv s6, t3 -.LBB0_673: # %while.end245.i +.LBB0_672: # %while.end245.i # in Loop: Header=BB0_6 Depth=1 sext.w a0, s6 slti a0, a0, 1 negw a0, a0 addi a1, s6, -1 and a0, a0, a1 - ld s11, 248(sp) # 8-byte Folded Reload ld s10, 264(sp) # 8-byte Folded Reload li a4, 1 ld s5, 440(sp) # 8-byte Folded Reload ld s2, 456(sp) # 8-byte Folded Reload -.LBB0_674: # %land.rhs249.i +.LBB0_673: # %land.rhs249.i # Parent Loop BB0_6 Depth=1 # => This Inner Loop Header: Depth=2 sext.w a1, s6 @@ -5731,24 +5697,24 @@ lw a2, -4(a2) subw a3, a3, a2 li a2, 2 - blt a2, a3, .LBB0_677 -# %bb.675: # %while.body259.i - # in Loop: Header=BB0_674 Depth=2 + blt a2, a3, .LBB0_676 +# %bb.674: # %while.body259.i + # in Loop: Header=BB0_673 Depth=2 addi s6, s6, -1 - blt a4, a1, .LBB0_674 -# %bb.676: # in Loop: Header=BB0_6 Depth=1 + blt a4, a1, .LBB0_673 +# %bb.675: # in Loop: Header=BB0_6 Depth=1 mv s6, a0 -.LBB0_677: # %while.end261.i +.LBB0_676: # %while.end261.i # in Loop: Header=BB0_6 Depth=1 sext.w a0, s6 slli a0, a0, 2 add a1, t0, a0 lw s1, 0(a1) - ld s4, 424(sp) # 8-byte Folded Reload + ld s4, 416(sp) # 8-byte Folded Reload add a0, s4, a0 lw s0, 0(a0) - ld a0, 368(sp) # 8-byte Folded Reload - subw a0, a0, s9 + ld a0, 360(sp) # 8-byte Folded Reload + subw a0, a0, s11 add a0, a0, s1 add s0, a0, s0 mv a0, t0 @@ -5759,16 +5725,16 @@ call free ld a0, 464(sp) # 8-byte Folded Reload call free - mv s9, s1 + mv s11, s1 li t5, 1 li t2, -1 - j .LBB0_236 -.LBB0_678: # %for.end606 + j .LBB0_235 +.LBB0_677: # %for.end606 ld a0, 536(sp) call free ld a0, 512(sp) call free -.LBB0_679: # %cleanup609 +.LBB0_678: # %cleanup609 csrr a0, vlenb li a1, 6 mul a0, a0, a1 --- build.a/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s 2024-04-01 12:41:02.846352880 +0000 +++ build.b/MultiSource/Benchmarks/mafft/CMakeFiles/pairlocalalign.dir/mltaln9.s 2024-04-01 12:41:14.954015225 +0000 @@ -17950,18 +17950,18 @@ li a2, 1 li s10, -1 vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v14, 0 + vmv.v.i v8, 0 + addi a0, sp, 176 + vs2r.v v8, (a0) # Unknown-size Folded Spill li a0, -1025 slli a0, a0, 52 sd a0, 64(sp) # 8-byte Folded Spill vid.v v8 - addi a0, sp, 176 - vs2r.v v8, (a0) # Unknown-size Folded Spill csrr a0, vlenb slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 176 - vs2r.v v14, (a0) # Unknown-size Folded Spill + vs2r.v v8, (a0) # Unknown-size Folded Spill sd s5, 112(sp) # 8-byte Folded Spill j .LBB66_5 .LBB66_4: # %for.cond.loopexit.us @@ -18126,11 +18126,6 @@ srli a2, a1, 30 li a1, 255 call memset - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 176 - vl2r.v v14, (a0) # Unknown-size Folded Reload lw a1, 0(s3) addiw a1, a1, 1 beqz a1, .LBB66_24 @@ -18142,11 +18137,6 @@ srli a2, a1, 30 li a1, 255 call memset - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 176 - vl2r.v v14, (a0) # Unknown-size Folded Reload .LBB66_24: # %while.end58.us.us # in Loop: Header=BB66_9 Depth=3 ld a0, 136(sp) # 8-byte Folded Reload @@ -18212,7 +18202,10 @@ subw a0, a0, a6 add a2, a5, a6 vsetvli a7, zero, e32, m2, ta, ma - addi a7, sp, 176 + csrr a7, vlenb + slli a7, a7, 1 + add a7, sp, a7 + addi a7, a7, 176 vl2r.v v8, (a7) # Unknown-size Folded Reload vadd.vx v8, v8, a5 mv a5, a6 @@ -18261,11 +18254,6 @@ addi a1, a1, %pcrel_lo(.Lpcrel_hi406) ld a2, 64(sp) # 8-byte Folded Reload call fprintf - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 176 - vl2r.v v14, (a0) # Unknown-size Folded Reload lw a0, 52(s1) bltz a0, .LBB66_29 .LBB66_38: # %for.end.us.us @@ -18328,7 +18316,10 @@ subw a0, a0, a6 add a2, a5, a6 vsetvli a7, zero, e32, m2, ta, ma - addi a7, sp, 176 + csrr a7, vlenb + slli a7, a7, 1 + add a7, sp, a7 + addi a7, a7, 176 vl2r.v v8, (a7) # Unknown-size Folded Reload vadd.vx v8, v8, a5 mv a5, a6 @@ -18377,11 +18368,6 @@ addi a1, a1, %pcrel_lo(.Lpcrel_hi408) ld a2, 64(sp) # 8-byte Folded Reload call fprintf - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 176 - vl2r.v v14, (a0) # Unknown-size Folded Reload lw a0, 52(s1) bltz a0, .LBB66_43 .LBB66_52: # %for.end117.us.us @@ -18409,23 +18395,24 @@ slli a3, a3, 2 neg a3, a3 and a3, a3, s3 - vsetvli a4, zero, e32, m2, ta, ma mv a4, a3 mv a5, a2 mv a6, a1 - vmv2r.v v8, v14 + addi a7, sp, 176 + vl2r.v v8, (a7) # Unknown-size Folded Reload .LBB66_56: # %vector.body # Parent Loop BB66_5 Depth=1 # Parent Loop BB66_7 Depth=2 # Parent Loop BB66_9 Depth=3 # => This Inner Loop Header: Depth=4 vl2re32.v v10, (a6) + vsetvli a7, zero, e32, m2, ta, ma vmsne.vi v0, v10, -1 vle32.v v10, (a5), v0.t vmsne.vi v12, v10, -1 vmand.mm v0, v0, v12 - vmerge.vim v10, v14, 1, v0 - vadd.vv v8, v8, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t add a6, a6, s9 sub a4, a4, s7 add a5, a5, s9 @@ -18576,11 +18563,6 @@ fsd fs0, 40(a0) sw s4, 52(a0) sw s0, 48(a0) - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 176 - vl2r.v v14, (a0) # Unknown-size Folded Reload ld s5, 112(sp) # 8-byte Folded Reload li s10, -1 mv s6, s2 --- build.a/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/sqlite3.s 2024-04-01 12:41:02.682357453 +0000 +++ build.b/MultiSource/Applications/sqlite3/CMakeFiles/sqlite3.dir/sqlite3.s 2024-04-01 12:41:14.786019910 +0000 @@ -84012,21 +84012,19 @@ vmv.v.i v8, 0 li a6, 40 mv a7, a2 - vmv.v.i v10, 0 .LBB418_99: # %vector.body # =>This Inner Loop Header: Depth=1 - vlse8.v v12, (a4), a6 vsetvli zero, zero, e8, mf2, ta, ma - vmsne.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vlse8.v v10, (a4), a6 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a7, a7, a3 add a4, a4, t0 bnez a7, .LBB418_99 # %bb.100: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a3, v8 beq a2, a0, .LBB418_122 .LBB418_101: # %for.body116.preheader --- build.a/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s 2024-04-01 12:41:02.466363477 +0000 +++ build.b/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_autoit.s 2024-04-01 12:41:14.570025934 +0000 @@ -189,7 +189,8 @@ vsetivli zero, 8, e32, m2, ta, ma vmv.v.i v8, 0 csrr a0, vlenb - slli a0, a0, 3 + li a1, 6 + mul a0, a0, a1 add a0, sp, a0 addi a0, a0, 1856 vs2r.v v8, (a0) # Unknown-size Folded Spill @@ -232,9 +233,14 @@ sd a0, 152(sp) # 8-byte Folded Spill srli s5, s3, 1 vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v12, 0 + vmv.v.i v8, 0 + csrr a0, vlenb + slli a0, a0, 2 + add a0, sp, a0 + addi a0, a0, 1856 + vs2r.v v8, (a0) # Unknown-size Folded Spill vsetvli zero, zero, e64, m4, ta, ma - vid.v v16 + vid.v v12 li s4, 2 addi a0, s6, 1474 sd a0, 184(sp) # 8-byte Folded Spill @@ -255,15 +261,11 @@ addi a0, sp, 1856 vs4r.v v8, (a0) # Unknown-size Folded Spill csrr a0, vlenb - slli a0, a0, 3 - add a0, sp, a0 - addi a0, a0, 1856 - vs2r.v v12, (a0) # Unknown-size Folded Spill - csrr a0, vlenb - slli a0, a0, 2 + li a1, 6 + mul a0, a0, a1 add a0, sp, a0 addi a0, a0, 1856 - vs4r.v v16, (a0) # Unknown-size Folded Spill + vs4r.v v12, (a0) # Unknown-size Folded Spill j .LBB0_12 .LBB0_11: # %if.then92.i # in Loop: Header=BB0_12 Depth=1 @@ -339,15 +341,11 @@ mv a1, s6 call LAME_decrypt csrr a0, vlenb - slli a0, a0, 3 + li a1, 6 + mul a0, a0, a1 add a0, sp, a0 addi a0, a0, 1856 - vl2r.v v16, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - slli a0, a0, 2 - add a0, sp, a0 - addi a0, a0, 1856 - vl4r.v v20, (a0) # Unknown-size Folded Reload + vl4r.v v16, (a0) # Unknown-size Folded Reload beqz s6, .LBB0_37 # %bb.20: # %if.end.i.i # in Loop: Header=BB0_12 Depth=1 @@ -411,12 +409,16 @@ add a3, a3, s5 and a3, a3, a4 addi a4, sp, 1248 - vmv2r.v v10, v16 + csrr a5, vlenb + slli a5, a5, 2 + add a5, sp, a5 + addi a5, a5, 1856 + vl2r.v v10, (a5) # Unknown-size Folded Reload .LBB0_31: # %vector.body696 # Parent Loop BB0_12 Depth=1 # => This Inner Loop Header: Depth=2 vsetvli a5, zero, e64, m4, ta, ma - vsaddu.vx v12, v20, a1 + vsaddu.vx v12, v16, a1 vmsltu.vx v8, v12, a2 vmv1r.v v0, v8 vlse8.v v9, (a4), s4, v0.t @@ -428,9 +430,8 @@ vmv2r.v v12, v10 vmseq.vi v9, v9, 0 vmand.mm v0, v0, v9 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v10, v16, 1, v0 - vadd.vv v10, v12, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v10, v10, 1, v0.t add a1, a1, s5 add a4, a4, s3 bne a3, a1, .LBB0_31 @@ -536,15 +537,11 @@ add a0, a1, a0 sb zero, 0(a0) csrr a0, vlenb - slli a0, a0, 3 + li a1, 6 + mul a0, a0, a1 add a0, sp, a0 addi a0, a0, 1856 - vl2r.v v16, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - slli a0, a0, 2 - add a0, sp, a0 - addi a0, a0, 1856 - vl4r.v v20, (a0) # Unknown-size Folded Reload + vl4r.v v16, (a0) # Unknown-size Folded Reload beqz s6, .LBB0_62 # %bb.45: # %if.end.i189.i # in Loop: Header=BB0_12 Depth=1 @@ -604,12 +601,16 @@ add a4, a4, a3 and a4, a4, a5 addi a5, sp, 1248 - vmv2r.v v10, v16 + csrr a6, vlenb + slli a6, a6, 2 + add a6, sp, a6 + addi a6, a6, 1856 + vl2r.v v10, (a6) # Unknown-size Folded Reload .LBB0_56: # %vector.body670 # Parent Loop BB0_12 Depth=1 # => This Inner Loop Header: Depth=2 vsetvli a6, zero, e64, m4, ta, ma - vsaddu.vx v12, v20, a1 + vsaddu.vx v12, v16, a1 vmsltu.vx v8, v12, a2 vmv1r.v v0, v8 vlse8.v v9, (a5), s4, v0.t @@ -621,9 +622,8 @@ vmv2r.v v12, v10 vmseq.vi v9, v9, 0 vmand.mm v0, v0, v9 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v10, v16, 1, v0 - vadd.vv v10, v12, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v10, v10, 1, v0.t add a1, a1, a3 add a5, a5, s3 bne a4, a1, .LBB0_56 @@ -2840,12 +2840,8 @@ addi a4, a4, 1 slli a2, s8, 32 csrr a5, vlenb - slli a5, a5, 3 - add a5, sp, a5 - addi a5, a5, 1856 - vl2r.v v18, (a5) # Unknown-size Folded Reload - csrr a5, vlenb - slli a5, a5, 2 + li a7, 6 + mul a5, a5, a7 add a5, sp, a5 addi a5, a5, 1856 vl4r.v v20, (a5) # Unknown-size Folded Reload @@ -2934,7 +2930,11 @@ add a6, a6, a5 and a6, a6, a7 mv a7, a1 - vmv2r.v v10, v18 + csrr t0, vlenb + slli t0, t0, 2 + add t0, sp, t0 + addi t0, t0, 1856 + vl2r.v v10, (t0) # Unknown-size Folded Reload .LBB0_333: # %vector.body # Parent Loop BB0_12 Depth=1 # Parent Loop BB0_301 Depth=2 @@ -2952,9 +2952,8 @@ vmv2r.v v12, v10 vmseq.vi v9, v9, 0 vmand.mm v0, v0, v9 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v10, v18, 1, v0 - vadd.vv v10, v12, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v10, v10, 1, v0.t add a3, a3, a5 add a7, a7, s3 bne a6, a3, .LBB0_333 @@ -3905,11 +3904,6 @@ j .LBB0_681 .LBB0_450: # %if.end.i390.i # in Loop: Header=BB0_412 Depth=1 - csrr a1, vlenb - slli a1, a1, 3 - add a1, sp, a1 - addi a1, a1, 1856 - vl2r.v v16, (a1) # Unknown-size Folded Reload li a1, 5 bltu a0, a1, .LBB0_455 # %bb.451: # %land.lhs.true.i391.i @@ -5644,8 +5638,13 @@ sub a3, a2, a3 slli a2, a3, 1 mv a4, s9 - vmv2r.v v10, v16 - vmv2r.v v12, v16 + csrr a5, vlenb + li a6, 6 + mul a5, a5, a6 + add a5, sp, a5 + addi a5, a5, 1856 + vl2r.v v12, (a5) # Unknown-size Folded Reload + vmv2r.v v10, v12 .LBB0_670: # %vector.body721 # Parent Loop BB0_412 Depth=1 # => This Inner Loop Header: Depth=2 @@ -5665,12 +5664,10 @@ vmseq.vi v15, v15, 0 vmand.mm v0, v8, v14 vmand.mm v8, v9, v15 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v14, v16, 1, v0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v10, v10, 1, v0.t vmv1r.v v0, v8 - vmerge.vim v8, v16, 1, v0 - vadd.vv v10, v10, v14 - vadd.vv v12, v12, v8 + vadd.vi v12, v12, 1, v0.t addi a3, a3, -16 addi a4, a4, 32 bnez a3, .LBB0_670 --- build.a/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s 2024-04-01 12:41:02.742355780 +0000 +++ build.b/MultiSource/Benchmarks/Bullet/CMakeFiles/bullet.dir/btDiscreteDynamicsWorld.s 2024-04-01 12:41:14.854018014 +0000 @@ -11436,29 +11436,28 @@ li s1, 220 li s2, 32 mv s3, t4 - vmv.v.i v9, 0 .LBB61_20: # %vector.body # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (t5) vsetvli zero, zero, e64, m2, ta, ma vluxei64.v v12, (s0), v10 vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v14, (s1), v12 - vmsle.vi v0, v14, -1 + vluxei64.v v9, (s1), v12 + vmsle.vi v0, v9, -1 vsetvli zero, zero, e64, m2, ta, ma vluxei64.v v10, (s2), v10, v0.t vsetvli zero, zero, e32, m1, ta, ma vluxei64.v v12, (s1), v10, v0.t - vmerge.vvm v10, v14, v12, v0 - vmseq.vx v0, v10, a7 - vmerge.vim v10, v8, 1, v0 - vadd.vv v9, v9, v10 + vmerge.vvm v9, v9, v12, v0 + vmseq.vx v0, v9, a7 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub s3, s3, t0 add t5, t5, t6 bnez s3, .LBB61_20 # %bb.21: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v9, v8 + vmv.s.x v9, zero + vredsum.vs v8, v8, v9 vmv.x.s t0, v8 beq t3, t4, .LBB61_15 .LBB61_22: # %for.body18.preheader --- build.a/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s 2024-04-01 12:41:02.974349310 +0000 +++ build.b/MultiSource/Benchmarks/nbench/CMakeFiles/nbench.dir/nbench1.s 2024-04-01 12:41:15.062012213 +0000 @@ -3808,15 +3808,14 @@ # Parent Loop BB10_130 Depth=3 # Parent Loop BB10_142 Depth=4 # => This Inner Loop Header: Depth=5 - vsetvli a7, zero, e16, m2, ta, ma + vsetvli a7, zero, e16, m2, ta, mu vlse16.v v10, (a5), s10 vl2re16.v v12, (a2) vmseq.vi v14, v10, 1 vmsne.vi v10, v12, 1 vmand.mm v0, v14, v10 vse16.v v20, (a2), v0.t - vmerge.vim v10, v18, 1, v0 - vadd.vv v8, v8, v10 + vadd.vi v8, v8, 1, v0.t add a2, a2, s8 sub a6, a6, s5 add a5, a5, s11 --- build.a/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Archive/7z/7zOut.s 2024-04-01 12:41:01.350394599 +0000 +++ build.b/MultiSource/Benchmarks/7zip/CMakeFiles/7zip-benchmark.dir/CPP/7zip/Archive/7z/7zOut.s 2024-04-01 12:41:13.470056608 +0000 @@ -5938,35 +5938,33 @@ and a2, a2, a0 slli s6, s6, 1 vsetvli a4, zero, e32, m1, ta, ma - vmv.v.i v11, 0 - vsetvli zero, zero, e64, m2, ta, ma vmv.v.i v8, 0 + vsetvli zero, zero, e64, m2, ta, ma + vmv.v.i v10, 0 li a4, 24 mv a5, a2 mv a6, a1 - vmv1r.v v10, v11 .LBB23_143: # %vector.body753 # =>This Inner Loop Header: Depth=1 vl2re64.v v12, (a6) - vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v14, (a4), v12 - vmsne.vi v0, v14, 0 - vmerge.vim v12, v11, 1, v0 - vadd.vv v10, v10, v12 - vadd.vv v12, v14, v14 - vadd.vi v12, v12, 2 - vwadd.wv v8, v8, v12 + vsetvli zero, zero, e32, m1, ta, mu + vluxei64.v v9, (a4), v12 + vmsne.vi v0, v9, 0 + vadd.vi v8, v8, 1, v0.t + vadd.vv v9, v9, v9 + vadd.vi v9, v9, 2 + vwadd.wv v10, v10, v9 sub a5, a5, a3 add a6, a6, s6 bnez a5, .LBB23_143 # %bb.144: # %middle.block745 vsetvli zero, zero, e64, m2, ta, ma - vmv.s.x v11, zero - vredsum.vs v8, v8, v11 - vmv.x.s s3, v8 - vmv.s.x v8, zero + vmv.s.x v9, zero + vredsum.vs v9, v10, v9 + vmv.x.s s3, v9 + vmv.s.x v9, zero vsetvli zero, zero, e32, m1, ta, ma - vredsum.vs v8, v10, v8 + vredsum.vs v8, v8, v9 vmv.x.s a3, v8 beq a2, a0, .LBB23_147 .LBB23_145: # %for.body160.preheader --- build.a/MultiSource/Applications/lua/CMakeFiles/lua.dir/ltable.s 2024-04-01 12:41:02.590360018 +0000 +++ build.b/MultiSource/Applications/lua/CMakeFiles/lua.dir/ltable.s 2024-04-01 12:41:14.694022476 +0000 @@ -1351,7 +1351,7 @@ slli a3, a3, 4 add a3, a5, a3 addi a3, a3, -8 - vsetvli t0, zero, e32, m2, ta, ma + vsetvli t0, zero, e32, m2, ta, mu vmv2r.v v8, v12 .LBB11_17: # %vector.body # Parent Loop BB11_3 Depth=1 @@ -1359,8 +1359,7 @@ # => This Inner Loop Header: Depth=3 vlse32.v v10, (a3), s11 vmsne.vi v0, v10, 0 - vmerge.vim v10, v12, 1, v0 - vadd.vv v8, v8, v10 + vadd.vi v8, v8, 1, v0.t sub a6, a6, s5 add a3, a3, s6 bnez a6, .LBB11_17 --- build.a/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_readdb.s 2024-04-01 12:41:02.482363030 +0000 +++ build.b/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_readdb.s 2024-04-01 12:41:14.586025488 +0000 @@ -305,21 +305,19 @@ li a3, 42 mv a4, a1 mv a5, s6 - vmv.v.i v10, 0 .LBB0_42: # %vector.body # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a5) vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vx v0, v12, a3 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vle8.v v10, (a5) + vmseq.vx v0, v10, a3 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, a2 add a5, a5, a2 bnez a4, .LBB0_42 # %bb.43: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s1, v8 beq a0, a1, .LBB0_46 .LBB0_44: # %for.body122.preheader274 --- build.a/MultiSource/Applications/lemon/CMakeFiles/lemon.dir/lemon.s 2024-04-01 12:41:02.586360130 +0000 +++ build.b/MultiSource/Applications/lemon/CMakeFiles/lemon.dir/lemon.s 2024-04-01 12:41:14.686022699 +0000 @@ -468,6 +468,7 @@ # Parent Loop BB4_11 Depth=1 # => This Inner Loop Header: Depth=2 addi s1, t6, 64 + vsetvli zero, zero, e32, m2, ta, mu vlseg2e32.v v20, (t6) vlseg2e32.v v22, (s1) vmsgt.vi v8, v20, -1 @@ -478,11 +479,9 @@ vmseq.vv v20, v22, v26 vmand.mm v0, v8, v28 vmand.mm v8, v9, v20 - vmerge.vim v20, v12, 1, v0 + vadd.vi v14, v14, 1, v0.t vmv1r.v v0, v8 - vmerge.vim v8, v12, 1, v0 - vadd.vv v14, v14, v20 - vadd.vv v16, v16, v8 + vadd.vi v16, v16, 1, v0.t vadd.vx v18, v18, a5 addi t5, t5, -16 addi t6, t6, 128 --- build.a/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_regex_regcomp.s 2024-04-01 12:41:02.482363030 +0000 +++ build.b/MultiSource/Applications/ClamAV/CMakeFiles/clamscan.dir/libclamav_regex_regcomp.s 2024-04-01 12:41:14.586025488 +0000 @@ -5191,22 +5191,20 @@ li a7, 508 mul a4, a4, a7 and a4, a4, a1 - vsetvli a7, zero, e32, m2, ta, ma - vmv.v.i v10, 0 srli a6, a6, 1 + vsetvli a7, zero, e32, m2, ta, ma vmv.v.i v8, 0 .LBB4_213: # %vector.body400 # =>This Inner Loop Header: Depth=1 andi a7, a5, 252 add a7, a2, a7 - vle8.v v12, (a7) vsetvli zero, zero, e8, mf2, ta, ma - vand.vx v12, v12, a3 - vmsne.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 + vle8.v v10, (a7) + vand.vx v10, v10, a3 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu add a5, a5, a6 - vadd.vv v8, v8, v12 + vadd.vi v8, v8, 1, v0.t bne a4, a5, .LBB4_213 # %bb.214: # %middle.block392 vmv.s.x v10, zero --- build.a/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s 2024-04-01 12:41:02.622359126 +0000 +++ build.b/MultiSource/Applications/oggenc/CMakeFiles/oggenc.dir/oggenc.s 2024-04-01 12:41:14.726021584 +0000 @@ -36657,26 +36657,24 @@ .LBB196_4: # %vector.ph neg a2, a3 and a2, a0, a2 - vsetvli a4, zero, e32, m1, ta, ma - vmv.v.i v8, 0 slli a4, s8, 1 + vsetvli a5, zero, e32, m1, ta, ma + vmv.v.i v8, 0 mv a5, a2 mv a6, a1 - vmv.v.i v9, 0 .LBB196_5: # %vector.body # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (a6) vsetvli zero, zero, e64, m2, ta, ma vmsgt.vi v0, v10, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v10, v8, 1, v0 - vadd.vv v9, v9, v10 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, a3 add a6, a6, a4 bnez a5, .LBB196_5 # %bb.6: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v9, v8 + vmv.s.x v9, zero + vredsum.vs v8, v8, v9 vmv.x.s s3, v8 beq a0, a2, .LBB196_9 .LBB196_7: # %for.body.preheader @@ -62314,24 +62312,23 @@ slli a0, a0, 31 sub a0, a0, a4 and a0, a0, s1 + slli a2, a2, 1 vsetvli a4, zero, e32, m2, ta, ma vmv.v.i v8, 0 - slli a2, a2, 1 mv a4, a0 mv a5, a3 - vmv.v.i v10, 0 .LBB275_4: # %vector.body # =>This Inner Loop Header: Depth=1 - vl2re32.v v12, (a5) - vmsne.vi v0, v12, 0 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vl2re32.v v10, (a5) + vsetvli zero, zero, e32, m2, ta, mu + vmsne.vi v0, v10, 0 + vadd.vi v8, v8, 1, v0.t sub a4, a4, a6 add a5, a5, a2 bnez a4, .LBB275_4 # %bb.5: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a4, v8 beq a0, s1, .LBB275_8 .LBB275_6: # %for.body.preheader29 @@ -67783,29 +67780,27 @@ li a0, 0 ret .LBB312_4: # %vector.ph + addi a4, a1, 27 srli a3, a3, 3 - li a4, 252 - mul a3, a3, a4 + li a5, 252 + mul a3, a3, a5 and a3, a3, a2 - vsetvli a4, zero, e32, m2, ta, ma + vsetvli a5, zero, e32, m2, ta, ma vmv.v.i v8, 0 - addi a4, a1, 27 mv a5, a3 - vmv.v.i v10, 0 .LBB312_5: # %vector.body # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a4) vsetvli zero, zero, e8, mf2, ta, ma - vmsne.vi v0, v12, -1 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vle8.v v10, (a4) + vmsne.vi v0, v10, -1 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, a0 add a4, a4, a0 bnez a5, .LBB312_5 # %bb.6: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a0, v8 beq a3, a2, .LBB312_9 .LBB312_7: # %for.body.preheader9 --- build.a/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s 2024-04-01 12:41:03.014348194 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/makesite.s 2024-04-01 12:41:15.106010987 +0000 @@ -309,49 +309,46 @@ vmv.v.x v14, s1 addi t0, a3, 36 li t2, 10 - mul t1, t1, t2 .Lpcrel_hi9: - auipc t2, %pcrel_hi(.LCPI0_2) - fld fa4, %pcrel_lo(.Lpcrel_hi9)(t2) + auipc t3, %pcrel_hi(.LCPI0_2) + fld fa4, %pcrel_lo(.Lpcrel_hi9)(t3) + mul t1, t1, t2 li t2, 20 vmv.v.i v16, 1 - vmv.v.i v18, 0 mv t3, a7 .LBB0_30: # %vector.body164 # =>This Inner Loop Header: Depth=1 addi t4, t0, -4 vsse32.v v14, (t4), t2 - vnsrl.wi v20, v8, 0 - vadd.vi v20, v20, 1 - vfwcvt.f.x.v v24, v20 + vnsrl.wi v18, v8, 0 + vadd.vi v18, v18, 1 + vfwcvt.f.x.v v20, v18 vsetvli zero, zero, e64, m4, ta, ma - vfmul.vf v20, v24, fa5 + vfmul.vf v20, v20, fa5 vsetvli zero, zero, e32, m2, ta, ma vfwcvt.f.x.v v24, v12 vsetvli zero, zero, e64, m4, ta, ma vfmul.vf v24, v24, fa5 vsetvli zero, zero, e32, m2, ta, ma - vfncvt.rtz.x.f.w v28, v24 - vfwcvt.f.x.v v4, v28 + vfncvt.rtz.x.f.w v18, v24 + vfwcvt.f.x.v v28, v18 vsetvli zero, zero, e64, m4, ta, ma - vfsub.vv v4, v24, v4 - vmfge.vf v0, v4, fa4 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v30, v18, 1, v0 - vadd.vv v28, v30, v28 - vadd.vx v30, v28, s0 - vsse32.v v30, (t0), t2 - vfncvt.rtz.x.f.w v30, v20 - vfwcvt.f.x.v v4, v30 + vfsub.vv v28, v24, v28 + vmfge.vf v0, v28, fa4 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v18, v18, 1, v0.t + vadd.vx v28, v18, s0 + vsse32.v v28, (t0), t2 + vfncvt.rtz.x.f.w v28, v20 + vfwcvt.f.x.v v4, v28 vsetvli zero, zero, e64, m4, ta, ma vfsub.vv v4, v20, v4 vmfge.vf v0, v4, fa4 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v6, v18, 1, v0 - vadd.vv v30, v6, v30 - vsub.vv v28, v30, v28 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v28, v28, 1, v0.t + vsub.vv v18, v28, v18 addi t4, t0, -8 - vsse32.v v28, (t4), t2 + vsse32.v v18, (t4), t2 addi t4, t0, -12 vsse32.v v16, (t4), t2 vsetvli zero, zero, e64, m4, ta, ma @@ -403,47 +400,44 @@ vmv.v.x v14, s1 addi t1, a3, 36 li t3, 10 - mul t2, t2, t3 fld fa4, %pcrel_lo(.Lpcrel_hi7)(a5) + mul t2, t2, t3 li t3, 20 vmv.v.i v16, 1 - vmv.v.i v18, 0 mv t4, t0 .LBB0_36: # %vector.body138 # =>This Inner Loop Header: Depth=1 addi t5, t1, -4 vsse32.v v14, (t5), t3 - vnsrl.wi v20, v8, 0 - vadd.vi v20, v20, 1 - vfwcvt.f.x.v v24, v20 + vnsrl.wi v18, v8, 0 + vadd.vi v18, v18, 1 + vfwcvt.f.x.v v20, v18 vsetvli zero, zero, e64, m4, ta, ma - vfmul.vf v20, v24, fa5 + vfmul.vf v20, v20, fa5 vsetvli zero, zero, e32, m2, ta, ma vfwcvt.f.x.v v24, v12 vsetvli zero, zero, e64, m4, ta, ma vfmul.vf v24, v24, fa5 vsetvli zero, zero, e32, m2, ta, ma - vfncvt.rtz.x.f.w v28, v24 - vfwcvt.f.x.v v4, v28 + vfncvt.rtz.x.f.w v18, v24 + vfwcvt.f.x.v v28, v18 vsetvli zero, zero, e64, m4, ta, ma - vfsub.vv v4, v24, v4 - vmfge.vf v0, v4, fa4 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v30, v18, 1, v0 - vadd.vv v28, v30, v28 - vrsub.vx v30, v28, s0 - vsse32.v v30, (t1), t3 - vfncvt.rtz.x.f.w v30, v20 - vfwcvt.f.x.v v4, v30 + vfsub.vv v28, v24, v28 + vmfge.vf v0, v28, fa4 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v18, v18, 1, v0.t + vrsub.vx v28, v18, s0 + vsse32.v v28, (t1), t3 + vfncvt.rtz.x.f.w v28, v20 + vfwcvt.f.x.v v4, v28 vsetvli zero, zero, e64, m4, ta, ma vfsub.vv v4, v20, v4 vmfge.vf v0, v4, fa4 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v6, v18, 1, v0 - vadd.vv v30, v6, v30 - vsub.vv v28, v30, v28 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v28, v28, 1, v0.t + vsub.vv v18, v28, v18 addi t5, t1, -8 - vsse32.v v28, (t5), t3 + vsse32.v v18, (t5), t3 addi t5, t1, -12 vsse32.v v16, (t5), t3 vsetvli zero, zero, e64, m4, ta, ma @@ -910,9 +904,8 @@ vsetvli zero, zero, e64, m4, ta, ma vfsub.vv v28, v24, v28 vmfge.vf v0, v28, fa4 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v28, v16, 1, v0 - vadd.vv v18, v28, v18 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v18, v18, 1, v0.t vadd.vx v28, v18, s1 addi t3, a7, -4 vsse32.v v28, (t3), t1 @@ -921,9 +914,8 @@ vsetvli zero, zero, e64, m4, ta, ma vfsub.vv v4, v20, v4 vmfge.vf v0, v4, fa4 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v30, v16, 1, v0 - vadd.vv v28, v30, v28 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v28, v28, 1, v0.t vsub.vv v18, v28, v18 addi t3, a7, -8 vsse32.v v18, (t3), t1 @@ -1000,9 +992,8 @@ vsetvli zero, zero, e64, m4, ta, ma vfsub.vv v28, v24, v28 vmfge.vf v0, v28, fa4 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v28, v16, 1, v0 - vadd.vv v18, v28, v18 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v18, v18, 1, v0.t vrsub.vx v28, v18, s1 addi t4, t0, -4 vsse32.v v28, (t4), t2 @@ -1011,9 +1002,8 @@ vsetvli zero, zero, e64, m4, ta, ma vfsub.vv v4, v20, v4 vmfge.vf v0, v4, fa4 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v30, v16, 1, v0 - vadd.vv v28, v30, v28 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v28, v28, 1, v0.t vsub.vv v18, v28, v18 addi t4, t0, -8 vsse32.v v18, (t4), t2 --- build.a/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/state.s 2024-04-01 12:41:02.798354218 +0000 +++ build.b/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/state.s 2024-04-01 12:41:14.906016564 +0000 @@ -1250,39 +1250,36 @@ srli a6, s11, 1 neg a5, a6 and a5, a0, a5 - vsetvli a7, zero, e32, m2, ta, ma - vmv.v.i v8, 0 slli a7, s11, 1 + vsetvli t0, zero, e32, m2, ta, ma + vmv.v.i v10, 0 mv t0, a5 mv t1, s8 mv t2, s7 mv t3, s9 - vmv.v.i v10, 0 .LBB6_87: # %vector.body # =>This Inner Loop Header: Depth=1 - vl2re32.v v12, (t2) + vl2re32.v v8, (t2) vl2re32.v v16, (t3) vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v20, v12 - vsll.vi v12, v20, 2 - vsetvli zero, zero, e32, m2, ta, ma - vluxei64.v v18, (a1), v12 - vluxei64.v v20, (a2), v12 - vmseq.vv v0, v16, v18 - vmerge.vim v18, v8, 1, v0 - vmseq.vv v0, v16, v20 + vsext.vf2 v12, v8 + vsll.vi v12, v12, 2 + vsetvli zero, zero, e32, m2, ta, mu + vluxei64.v v8, (a1), v12 + vluxei64.v v18, (a2), v12 + vmseq.vv v0, v16, v8 + vmseq.vv v8, v16, v18 vl2re32.v v16, (t1) - vluxei64.v v20, (a3), v12 - vadd.vv v10, v10, v18 - vluxei64.v v18, (a4), v12 - vmerge.vim v12, v8, 1, v0 - vmseq.vv v0, v16, v20 - vadd.vv v10, v10, v12 - vmerge.vim v12, v8, 1, v0 + vluxei64.v v18, (a3), v12 + vluxei64.v v20, (a4), v12 + vadd.vi v10, v10, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t vmseq.vv v0, v16, v18 - vadd.vv v10, v10, v12 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vmseq.vv v8, v16, v20 + vadd.vi v10, v10, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t add t3, t3, a7 add t2, t2, a7 sub t0, t0, a6 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/space_node/node_templates.s 2024-04-01 12:40:59.010459856 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/space_node/node_templates.s 2024-04-01 12:41:11.014125098 +0000 @@ -1277,8 +1277,6 @@ # in Loop: Header=BB7_7 Depth=1 li t2, 0 addi a2, a0, 16 - addi a3, sp, 256 - vl2r.v v12, (a3) # Unknown-size Folded Reload li t1, 32 j .LBB7_19 .LBB7_17: # %for.cond8.loopexit.loopexit.i @@ -1358,8 +1356,6 @@ addi a2, a2, 88 mv a4, a3 addi a3, a3, 8 - addi a5, sp, 256 - vl2r.v v12, (a5) # Unknown-size Folded Reload .LBB7_28: # %for.body71.i # Parent Loop BB7_7 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1380,8 +1376,6 @@ bltu s3, s9, .LBB7_32 j .LBB7_38 .LBB7_30: # in Loop: Header=BB7_7 Depth=1 - addi a1, sp, 256 - vl2r.v v12, (a1) # Unknown-size Folded Reload li t1, 32 .LBB7_31: # %for.body25.lr.ph # in Loop: Header=BB7_7 Depth=1 @@ -1401,17 +1395,17 @@ .LBB7_34: # %vector.ph # in Loop: Header=BB7_7 Depth=1 sub a2, s9, a2 - vsetvli a3, zero, e32, m2, ta, ma + vsetvli a3, zero, e32, m2, ta, mu mv a3, a1 mv a4, a2 - vmv2r.v v8, v12 + addi a6, sp, 256 + vl2r.v v8, (a6) # Unknown-size Folded Reload .LBB7_35: # %vector.body # Parent Loop BB7_7 Depth=1 # => This Inner Loop Header: Depth=2 vlse32.v v10, (a3), t1 vmseq.vx v0, v10, a5 - vmerge.vim v10, v12, 1, v0 - vadd.vv v8, v8, v10 + vadd.vi v8, v8, 1, v0.t sub a4, a4, s3 add a3, a3, s6 bnez a4, .LBB7_35 @@ -1422,8 +1416,6 @@ vmv.x.s s2, v8 j .LBB7_39 .LBB7_37: # in Loop: Header=BB7_7 Depth=1 - addi a0, sp, 256 - vl2r.v v12, (a0) # Unknown-size Folded Reload mv a0, a3 lh a5, 168(s11) addi a1, a3, 4 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-ssa-loop-prefetch.s 2024-04-01 12:41:00.542417132 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-ssa-loop-prefetch.s 2024-04-01 12:41:12.606080703 +0000 @@ -302,13 +302,14 @@ beqz a3, .LBB0_27 # %bb.29: # %for.body.lr.ph sd a3, 240(sp) # 8-byte Folded Spill - sd zero, 160(sp) # 8-byte Folded Spill + li s5, 0 li a0, -1 srli s8, a0, 32 csrr s11, vlenb srli s0, s11, 1 - slli s5, s11, 1 - srli s7, s11, 2 + slli s7, s11, 1 + srli a0, s11, 2 + sd a0, 64(sp) # 8-byte Folded Spill addi s9, s2, 8 lui a0, 16 addiw a0, a0, -1 @@ -316,7 +317,7 @@ li s6, 3 lui a0, 1 addiw a0, a0, -1140 - sd a0, 120(sp) # 8-byte Folded Spill + sd a0, 128(sp) # 8-byte Folded Spill vsetvli a0, zero, e8, mf4, ta, ma vmv.v.i v8, 1 addi a0, sp, 368 @@ -342,7 +343,7 @@ add a0, sp, a0 addi a0, a0, 368 vs2r.v v8, (a0) # Unknown-size Folded Spill - sd s3, 152(sp) # 8-byte Folded Spill + sd s3, 160(sp) # 8-byte Folded Spill sd s8, 216(sp) # 8-byte Folded Spill ld a0, 0(s3) beqz a0, .LBB0_32 @@ -362,18 +363,18 @@ call fprintf .LBB0_32: # %if.end56 # =>This Loop Header: Depth=1 - # Child Loop BB0_41 Depth 2 - # Child Loop BB0_49 Depth 3 - # Child Loop BB0_71 Depth 2 - # Child Loop BB0_73 Depth 3 - # Child Loop BB0_82 Depth 4 - # Child Loop BB0_92 Depth 5 - # Child Loop BB0_100 Depth 5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 - # Child Loop BB0_131 Depth 5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 + # Child Loop BB0_40 Depth 2 + # Child Loop BB0_48 Depth 3 + # Child Loop BB0_70 Depth 2 + # Child Loop BB0_72 Depth 3 + # Child Loop BB0_81 Depth 4 + # Child Loop BB0_91 Depth 5 + # Child Loop BB0_99 Depth 5 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 + # Child Loop BB0_130 Depth 5 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 # Child Loop BB0_159 Depth 2 # Child Loop BB0_161 Depth 3 # Child Loop BB0_191 Depth 2 @@ -410,7 +411,7 @@ ld s1, 240(sp) # 8-byte Folded Reload mv a0, s1 call optimize_loop_nest_for_size_p - beqz a0, .LBB0_37 + beqz a0, .LBB0_36 # %bb.33: # %if.then.i # in Loop: Header=BB0_32 Depth=1 ld a3, 0(s3) @@ -423,83 +424,74 @@ ld a0, %pcrel_lo(.Lpcrel_hi22)(a0) lbu a0, 0(a0) andi a0, a0, 8 - beqz a0, .LBB0_36 -# %bb.35: # %if.then3.i - # in Loop: Header=BB0_32 Depth=1 -.Lpcrel_hi23: - auipc a0, %pcrel_hi(.L.str.15) - addi a0, a0, %pcrel_lo(.Lpcrel_hi23) - li a1, 22 - li a2, 1 - call fwrite -.LBB0_36: # in Loop: Header=BB0_32 Depth=1 + bnez a0, .LBB0_156 +# %bb.35: # in Loop: Header=BB0_32 Depth=1 li s1, 0 - ld a0, 160(sp) # 8-byte Folded Reload j .LBB0_381 -.LBB0_37: # %if.end5.i +.LBB0_36: # %if.end5.i # in Loop: Header=BB0_32 Depth=1 - sd s4, 136(sp) # 8-byte Folded Spill + sd s4, 144(sp) # 8-byte Folded Spill mv a0, s1 call get_loop_body_in_dom_order lwu a1, 36(s1) mv s4, a0 sd zero, 336(sp) beqz a1, .LBB0_177 -# %bb.38: # %for.body.i.i.preheader +# %bb.37: # %for.body.i.i.preheader # in Loop: Header=BB0_32 Depth=1 - sd s7, 64(sp) # 8-byte Folded Spill - sd s5, 56(sp) # 8-byte Folded Spill + sd s7, 56(sp) # 8-byte Folded Spill + sd s5, 72(sp) # 8-byte Folded Spill sd zero, 184(sp) # 8-byte Folded Spill li s7, 0 li s5, 1 ld a3, 232(sp) # 8-byte Folded Reload ld s1, 240(sp) # 8-byte Folded Reload - sd s2, 96(sp) # 8-byte Folded Spill - sd s9, 80(sp) # 8-byte Folded Spill - j .LBB0_41 -.LBB0_39: # %for.inc46.loopexit.i.i - # in Loop: Header=BB0_41 Depth=2 + sd s2, 104(sp) # 8-byte Folded Spill + sd s9, 88(sp) # 8-byte Folded Spill + j .LBB0_40 +.LBB0_38: # %for.inc46.loopexit.i.i + # in Loop: Header=BB0_40 Depth=2 lwu a1, 36(s1) - ld s3, 152(sp) # 8-byte Folded Reload - ld s2, 96(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s2, 104(sp) # 8-byte Folded Reload ld s8, 216(sp) # 8-byte Folded Reload - ld s9, 80(sp) # 8-byte Folded Reload -.LBB0_40: # %for.inc46.i.i - # in Loop: Header=BB0_41 Depth=2 + ld s9, 88(sp) # 8-byte Folded Reload +.LBB0_39: # %for.inc46.i.i + # in Loop: Header=BB0_40 Depth=2 addi s7, s7, 1 - bgeu s7, a1, .LBB0_68 -.LBB0_41: # %for.body.i.i + bgeu s7, a1, .LBB0_67 +.LBB0_40: # %for.body.i.i # Parent Loop BB0_32 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_49 Depth 3 + # Child Loop BB0_48 Depth 3 slli a0, s7, 3 add a0, s4, a0 ld a0, 0(a0) ld a2, 24(a0) - bne a2, s1, .LBB0_40 -# %bb.42: # %if.end.i.i26 - # in Loop: Header=BB0_41 Depth=2 + bne a2, s1, .LBB0_39 +# %bb.41: # %if.end.i.i26 + # in Loop: Header=BB0_40 Depth=2 lbu a2, 97(a0) andi a2, a2, 2 - bnez a2, .LBB0_40 -# %bb.43: # %land.lhs.true.i.i.i.i - # in Loop: Header=BB0_41 Depth=2 + bnez a2, .LBB0_39 +# %bb.42: # %land.lhs.true.i.i.i.i + # in Loop: Header=BB0_40 Depth=2 ld a0, 64(a0) - beqz a0, .LBB0_40 -# %bb.44: # %bb_seq.exit.i.i.i - # in Loop: Header=BB0_41 Depth=2 + beqz a0, .LBB0_39 +# %bb.43: # %bb_seq.exit.i.i.i + # in Loop: Header=BB0_40 Depth=2 ld a0, 0(a0) - beqz a0, .LBB0_40 -# %bb.45: # %gsi_start_bb.exit.i.i - # in Loop: Header=BB0_41 Depth=2 + beqz a0, .LBB0_39 +# %bb.44: # %gsi_start_bb.exit.i.i + # in Loop: Header=BB0_40 Depth=2 ld s3, 0(a0) - beqz s3, .LBB0_67 -# %bb.46: # %for.body4.i.i.preheader - # in Loop: Header=BB0_41 Depth=2 + beqz s3, .LBB0_66 +# %bb.45: # %for.body4.i.i.preheader + # in Loop: Header=BB0_40 Depth=2 li s8, 6 - j .LBB0_49 -.LBB0_47: # %if.then38.i.i - # in Loop: Header=BB0_49 Depth=3 + j .LBB0_48 +.LBB0_46: # %if.then38.i.i + # in Loop: Header=BB0_48 Depth=3 addi a1, sp, 336 li a3, 1 ld a0, 240(sp) # 8-byte Folded Reload @@ -512,19 +504,19 @@ ld a0, 184(sp) # 8-byte Folded Reload addiw a0, a0, 1 sd a0, 184(sp) # 8-byte Folded Spill -.LBB0_48: # %for.inc.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_47: # %for.inc.i.i + # in Loop: Header=BB0_48 Depth=3 ld s3, 16(s3) - beqz s3, .LBB0_39 -.LBB0_49: # %for.body4.i.i + beqz s3, .LBB0_38 +.LBB0_48: # %for.body4.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_41 Depth=2 + # Parent Loop BB0_40 Depth=2 # => This Inner Loop Header: Depth=3 ld s2, 0(s3) lbu a0, 0(s2) - bne a0, s8, .LBB0_52 -# %bb.50: # %if.then.i.i.i.i - # in Loop: Header=BB0_49 Depth=3 + bne a0, s8, .LBB0_51 +# %bb.49: # %if.then.i.i.i.i + # in Loop: Header=BB0_48 Depth=3 mv s9, s5 .Lpcrel_hi24: auipc a0, %got_pcrel_hi(gss_for_code_) @@ -536,39 +528,39 @@ slli a0, a0, 3 add a0, s8, a0 ld a0, 0(a0) - beqz a0, .LBB0_57 -# %bb.51: # %if.then.i.i31.i.thread.i - # in Loop: Header=BB0_49 Depth=3 + beqz a0, .LBB0_56 +# %bb.50: # %if.then.i.i31.i.thread.i + # in Loop: Header=BB0_48 Depth=3 add a1, s2, a0 ld s5, 0(a1) - j .LBB0_62 -.LBB0_52: # %if.then9.i.i - # in Loop: Header=BB0_49 Depth=3 + j .LBB0_61 +.LBB0_51: # %if.then9.i.i + # in Loop: Header=BB0_48 Depth=3 andi a1, a0, 254 addi a1, a1, -10 li a2, -4 - bltu a1, a2, .LBB0_54 -# %bb.53: # %gimple_vuse.exit.i.i - # in Loop: Header=BB0_49 Depth=3 + bltu a1, a2, .LBB0_53 +# %bb.52: # %gimple_vuse.exit.i.i + # in Loop: Header=BB0_48 Depth=3 ld a1, 56(s2) - bnez a1, .LBB0_56 -.LBB0_54: # %lor.lhs.false.i.i - # in Loop: Header=BB0_49 Depth=3 + bnez a1, .LBB0_55 +.LBB0_53: # %lor.lhs.false.i.i + # in Loop: Header=BB0_48 Depth=3 li a1, 8 - bne a0, a1, .LBB0_48 -# %bb.55: # %land.lhs.true.i.i - # in Loop: Header=BB0_49 Depth=3 + bne a0, a1, .LBB0_47 +# %bb.54: # %land.lhs.true.i.i + # in Loop: Header=BB0_48 Depth=3 mv a0, s2 call gimple_call_flags ld a3, 232(sp) # 8-byte Folded Reload andi a0, a0, 1 - bnez a0, .LBB0_48 -.LBB0_56: # %if.then16.i.i - # in Loop: Header=BB0_49 Depth=3 + bnez a0, .LBB0_47 +.LBB0_55: # %if.then16.i.i + # in Loop: Header=BB0_48 Depth=3 li s5, 0 - j .LBB0_48 -.LBB0_57: # %gimple_assign_lhs.exit.i.i - # in Loop: Header=BB0_49 Depth=3 + j .LBB0_47 +.LBB0_56: # %gimple_assign_lhs.exit.i.i + # in Loop: Header=BB0_48 Depth=3 .Lpcrel_hi26: auipc a0, %pcrel_hi(.L.str.17) addi a0, a0, %pcrel_lo(.Lpcrel_hi26) @@ -581,23 +573,23 @@ ld s5, 0(s2) addi a1, a0, -10 li a2, -9 - bgeu a1, a2, .LBB0_59 -# %bb.58: # in Loop: Header=BB0_49 Depth=3 + bgeu a1, a2, .LBB0_58 +# %bb.57: # in Loop: Header=BB0_48 Depth=3 li a2, 0 ld a3, 232(sp) # 8-byte Folded Reload li s8, 6 - j .LBB0_63 -.LBB0_59: # %if.then.i.i31.i.i - # in Loop: Header=BB0_49 Depth=3 + j .LBB0_62 +.LBB0_58: # %if.then.i.i31.i.i + # in Loop: Header=BB0_48 Depth=3 slli a0, a0, 2 add a0, s1, a0 lwu a0, 0(a0) slli a0, a0, 3 add a0, s8, a0 ld a0, 0(a0) - bnez a0, .LBB0_61 -# %bb.60: # %cond.true.i.i.i40.i.i - # in Loop: Header=BB0_49 Depth=3 + bnez a0, .LBB0_60 +# %bb.59: # %cond.true.i.i.i40.i.i + # in Loop: Header=BB0_48 Depth=3 .Lpcrel_hi28: auipc a0, %pcrel_hi(.L.str.17) addi a0, a0, %pcrel_lo(.Lpcrel_hi28) @@ -607,16 +599,16 @@ li a1, 1622 call fancy_abort li a0, 0 -.LBB0_61: # %gimple_ops.exit.i.i37.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_60: # %gimple_ops.exit.i.i37.i.i + # in Loop: Header=BB0_48 Depth=3 ld a3, 232(sp) # 8-byte Folded Reload -.LBB0_62: # %gimple_ops.exit.i.i37.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_61: # %gimple_ops.exit.i.i37.i.i + # in Loop: Header=BB0_48 Depth=3 li s8, 6 add a0, s2, a0 ld a2, 8(a0) -.LBB0_63: # %gimple_assign_rhs1.exit.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_62: # %gimple_assign_rhs1.exit.i.i + # in Loop: Header=BB0_48 Depth=3 lwu a0, 0(a2) .Lpcrel_hi30: auipc a1, %got_pcrel_hi(tree_code_type) @@ -626,9 +618,9 @@ add a0, s1, a0 lw a0, 0(a0) li a1, 4 - bne a0, a1, .LBB0_65 -# %bb.64: # %if.then25.i.i - # in Loop: Header=BB0_49 Depth=3 + bne a0, a1, .LBB0_64 +# %bb.63: # %if.then25.i.i + # in Loop: Header=BB0_48 Depth=3 addi a1, sp, 336 ld a0, 240(sp) # 8-byte Folded Reload li a3, 0 @@ -639,33 +631,32 @@ ld a0, 184(sp) # 8-byte Folded Reload addiw a0, a0, 1 sd a0, 184(sp) # 8-byte Folded Spill -.LBB0_65: # %if.end31.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_64: # %if.end31.i.i + # in Loop: Header=BB0_48 Depth=3 lwu a0, 0(s5) and a0, a0, a3 slli a0, a0, 2 add a0, s1, a0 lw a0, 0(a0) li a1, 4 - beq a0, a1, .LBB0_47 -# %bb.66: # in Loop: Header=BB0_49 Depth=3 + beq a0, a1, .LBB0_46 +# %bb.65: # in Loop: Header=BB0_48 Depth=3 ld s1, 240(sp) # 8-byte Folded Reload mv s5, s9 - j .LBB0_48 -.LBB0_67: # in Loop: Header=BB0_41 Depth=2 - ld s3, 152(sp) # 8-byte Folded Reload - j .LBB0_40 -.LBB0_68: # %gather_memory_references.exit.i + j .LBB0_47 +.LBB0_66: # in Loop: Header=BB0_40 Depth=2 + ld s3, 160(sp) # 8-byte Folded Reload + j .LBB0_39 +.LBB0_67: # %gather_memory_references.exit.i # in Loop: Header=BB0_32 Depth=1 - ld s1, 336(sp) + sd s5, 136(sp) # 8-byte Folded Spill + ld s5, 336(sp) mv a0, s4 call free - beqz s1, .LBB0_178 -# %bb.69: # %for.body.i32.i.preheader + beqz s5, .LBB0_178 +# %bb.68: # %for.body.i32.i.preheader # in Loop: Header=BB0_32 Depth=1 - sd s5, 128(sp) # 8-byte Folded Spill - sd s1, 72(sp) # 8-byte Folded Spill - mv s2, s1 + sd s5, 80(sp) # 8-byte Folded Spill csrr a0, vlenb slli a0, a0, 2 add a0, sp, a0 @@ -681,170 +672,170 @@ add a0, sp, a0 addi a0, a0, 368 vl2r.v v28, (a0) # Unknown-size Folded Reload - j .LBB0_71 -.LBB0_70: # %prune_group_by_reuse.exit.i.i - # in Loop: Header=BB0_71 Depth=2 - ld s2, 24(s2) - beqz s2, .LBB0_157 -.LBB0_71: # %for.body.i32.i + j .LBB0_70 +.LBB0_69: # %prune_group_by_reuse.exit.i.i + # in Loop: Header=BB0_70 Depth=2 + ld s5, 24(s5) + beqz s5, .LBB0_157 +.LBB0_70: # %for.body.i32.i # Parent Loop BB0_32 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_73 Depth 3 - # Child Loop BB0_82 Depth 4 - # Child Loop BB0_92 Depth 5 - # Child Loop BB0_100 Depth 5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 - # Child Loop BB0_131 Depth 5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 - ld s5, 16(s2) - mv s4, s5 - bnez s5, .LBB0_73 - j .LBB0_70 -.LBB0_72: # %for.bodythread-pre-split.i.i.i - # in Loop: Header=BB0_73 Depth=3 - ld s5, 16(s2) -.LBB0_73: # %for.body.i.i.i + # Child Loop BB0_72 Depth 3 + # Child Loop BB0_81 Depth 4 + # Child Loop BB0_91 Depth 5 + # Child Loop BB0_99 Depth 5 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 + # Child Loop BB0_130 Depth 5 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 + ld s2, 16(s5) + mv s4, s2 + bnez s2, .LBB0_72 + j .LBB0_69 +.LBB0_71: # %for.bodythread-pre-split.i.i.i + # in Loop: Header=BB0_72 Depth=3 + ld s2, 16(s5) +.LBB0_72: # %for.body.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 + # Parent Loop BB0_70 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_82 Depth 4 - # Child Loop BB0_92 Depth 5 - # Child Loop BB0_100 Depth 5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 - # Child Loop BB0_131 Depth 5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 + # Child Loop BB0_81 Depth 4 + # Child Loop BB0_91 Depth 5 + # Child Loop BB0_99 Depth 5 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 + # Child Loop BB0_130 Depth 5 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 ld a0, 24(s4) ld a0, 8(a0) - beqz a0, .LBB0_76 -# %bb.74: # %if.end.i.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beqz a0, .LBB0_75 +# %bb.73: # %if.end.i.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a1, 0(s10) addi a1, a1, 2047 lw a1, 1161(a1) srai a2, a0, 63 xor a0, a0, a2 sub a0, a0, a2 - blt a1, a0, .LBB0_78 -# %bb.75: # %if.end11.i.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 + blt a1, a0, .LBB0_77 +# %bb.74: # %if.end11.i.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 divuw a0, a1, a0 slli a0, a0, 32 srli a0, a0, 32 li a1, 32 - j .LBB0_77 -.LBB0_76: # in Loop: Header=BB0_73 Depth=3 + j .LBB0_76 +.LBB0_75: # in Loop: Header=BB0_72 Depth=3 li a0, 1 li a1, 40 -.LBB0_77: # %cleanup.sink.split.i.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_76: # %cleanup.sink.split.i.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 add a1, s4, a1 sd a0, 0(a1) -.LBB0_78: # %prune_ref_by_self_reuse.exit.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 - beqz s5, .LBB0_140 -# %bb.79: # %for.body.lr.ph.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_77: # %prune_ref_by_self_reuse.exit.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 + beqz s2, .LBB0_139 +# %bb.78: # %for.body.lr.ph.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 li s7, 1 - j .LBB0_82 -.LBB0_80: # in Loop: Header=BB0_82 Depth=4 + j .LBB0_81 +.LBB0_79: # in Loop: Header=BB0_81 Depth=4 li a0, 0 -.LBB0_81: # %for.inc.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - ld s5, 56(s5) +.LBB0_80: # %for.inc.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + ld s2, 56(s2) mv s7, a0 - beqz s5, .LBB0_140 -.LBB0_82: # %for.body.i.i.i.i + beqz s2, .LBB0_139 +.LBB0_81: # %for.body.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 # => This Loop Header: Depth=4 - # Child Loop BB0_92 Depth 5 - # Child Loop BB0_100 Depth 5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 - # Child Loop BB0_131 Depth 5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 - beq s5, s4, .LBB0_80 -# %bb.83: # %if.end.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + # Child Loop BB0_91 Depth 5 + # Child Loop BB0_99 Depth 5 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 + # Child Loop BB0_130 Depth 5 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 + beq s2, s4, .LBB0_79 +# %bb.82: # %if.end.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 lbu a0, 64(s4) andi a0, a0, 1 - bnez a0, .LBB0_85 -# %bb.84: # %land.lhs.true.i.i.i36.i - # in Loop: Header=BB0_82 Depth=4 - lbu a0, 64(s5) + bnez a0, .LBB0_84 +# %bb.83: # %land.lhs.true.i.i.i36.i + # in Loop: Header=BB0_81 Depth=4 + lbu a0, 64(s2) andi a0, a0, 1 - bnez a0, .LBB0_109 -.LBB0_85: # %if.end8.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bnez a0, .LBB0_108 +.LBB0_84: # %if.end8.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a2, 16(s4) - ld a3, 16(s5) + ld a3, 16(s2) sub a4, a3, a2 - beqz a4, .LBB0_107 -# %bb.86: # %if.end9.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + beqz a4, .LBB0_106 +# %bb.85: # %if.end9.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a0, 24(s4) ld a0, 8(a0) - beqz a0, .LBB0_110 -# %bb.87: # %if.end27.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bltz a0, .LBB0_115 -# %bb.88: # %if.else.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bltz a4, .LBB0_109 -# %bb.89: # %if.else.if.end50_crit_edge.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + beqz a0, .LBB0_109 +# %bb.86: # %if.end27.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + bltz a0, .LBB0_114 +# %bb.87: # %if.else.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + bltz a4, .LBB0_108 +# %bb.88: # %if.else.if.end50_crit_edge.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a1, 0(s10) addi a1, a1, 2047 lw a1, 1161(a1) - bge a1, a0, .LBB0_117 -.LBB0_90: # %while.cond.preheader.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bge a1, a0, .LBB0_116 +.LBB0_89: # %while.cond.preheader.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 mv a3, a1 li a2, 2 - bltu a1, a2, .LBB0_94 -# %bb.91: # %while.cond.preheader.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu a1, a2, .LBB0_93 +# %bb.90: # %while.cond.preheader.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 andi a5, a0, 1 mv a3, a1 mv a2, a0 - bnez a5, .LBB0_94 -.LBB0_92: # %while.body.i.i.i.i.i + bnez a5, .LBB0_93 +.LBB0_91: # %while.body.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 # => This Inner Loop Header: Depth=5 mv a5, a3 andi a6, a2, 2 srli a3, a3, 1 - bnez a6, .LBB0_94 -# %bb.93: # %while.body.i.i.i.i.i - # in Loop: Header=BB0_92 Depth=5 + bnez a6, .LBB0_93 +# %bb.92: # %while.body.i.i.i.i.i + # in Loop: Header=BB0_91 Depth=5 srai a2, a2, 1 - bltu s6, a5, .LBB0_92 -.LBB0_94: # %while.end.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu s6, a5, .LBB0_91 +.LBB0_93: # %while.end.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 div a2, a4, a0 - beqz a1, .LBB0_118 -# %bb.95: # %for.cond2.preheader.lr.ph.i.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - beqz a3, .LBB0_118 -# %bb.96: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader - # in Loop: Header=BB0_82 Depth=4 + beqz a1, .LBB0_117 +# %bb.94: # %for.cond2.preheader.lr.ph.i.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + beqz a3, .LBB0_117 +# %bb.95: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader + # in Loop: Header=BB0_81 Depth=4 li a5, 8 mv a7, s0 - bltu a5, s0, .LBB0_98 -# %bb.97: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader - # in Loop: Header=BB0_82 Depth=4 + bltu a5, s0, .LBB0_97 +# %bb.96: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader + # in Loop: Header=BB0_81 Depth=4 li a7, 8 -.LBB0_98: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader - # in Loop: Header=BB0_82 Depth=4 +.LBB0_97: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader + # in Loop: Header=BB0_81 Depth=4 ld a5, 8(s4) ld a5, 16(a5) li t1, 0 @@ -861,32 +852,33 @@ snez a4, a4 or a4, a6, a4 srli a5, a5, 3 - j .LBB0_100 -.LBB0_99: # %for.cond2.for.inc22_crit_edge.us.i.i.i.i.i.i - # in Loop: Header=BB0_100 Depth=5 + j .LBB0_99 +.LBB0_98: # %for.cond2.for.inc22_crit_edge.us.i.i.i.i.i.i + # in Loop: Header=BB0_99 Depth=5 add t1, t1, a5 slli t4, t1, 32 srli t4, t4, 32 - bgeu t4, a1, .LBB0_125 -.LBB0_100: # %for.cond2.preheader.us.i.i.i.i.i.i + bgeu t4, a1, .LBB0_124 +.LBB0_99: # %for.cond2.preheader.us.i.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 # => This Loop Header: Depth=5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 sext.w a6, a3 sltu t4, a6, a7 or t4, t4, a4 - beqz t4, .LBB0_102 -# %bb.101: # in Loop: Header=BB0_100 Depth=5 + beqz t4, .LBB0_101 +# %bb.100: # in Loop: Header=BB0_99 Depth=5 li t4, 0 - j .LBB0_105 -.LBB0_102: # %vector.ph139 - # in Loop: Header=BB0_100 Depth=5 + j .LBB0_104 +.LBB0_101: # %vector.ph139 + # in Loop: Header=BB0_99 Depth=5 srli t5, s11, 1 neg t4, t5 + and t4, t4, a3 vsetvli t6, zero, e32, m1, tu, ma vmv1r.v v12, v26 vmv.s.x v12, t2 @@ -894,62 +886,59 @@ vmv1r.v v13, v26 vmv.s.x v13, t3 vmv2r.v v10, v28 - and t4, t4, a3 vmv1r.v v8, v12 vmv1r.v v10, v13 - vsetvli t2, zero, e32, m2, ta, ma - vmv.v.x v12, t1 mv t2, t4 - vmv2r.v v14, v24 -.LBB0_103: # %vector.body144 + vmv2r.v v12, v24 +.LBB0_102: # %vector.body144 # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 - # Parent Loop BB0_100 Depth=5 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 + # Parent Loop BB0_99 Depth=5 # => This Inner Loop Header: Depth=6 - vmv2r.v v16, v14 - vmadd.vx v16, a0, v12 + vsetvli t3, zero, e32, m2, ta, ma + vmv.v.x v14, t1 + vmacc.vx v14, a0, v12 vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v20, v16 + vsext.vf2 v16, v14 vsetvli zero, zero, e32, m2, ta, ma - vadd.vx v16, v16, t0 + vadd.vx v14, v14, t0 vsetvli zero, zero, e64, m4, ta, ma - vdivu.vx v20, v20, a1 + vdivu.vx v16, v16, a1 vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v18, v20, 0 + vnsrl.wi v20, v16, 0 vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v20, v16 - vdivu.vx v20, v20, a1 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v16, v20, 0 - vmsne.vv v0, v18, v16 + vsext.vf2 v16, v14 + vdivu.vx v16, v16, a1 + vsetvli zero, zero, e32, m2, ta, mu + vnsrl.wi v14, v16, 0 + vmsne.vv v0, v20, v14 vadd.vi v8, v8, 1 - vmerge.vim v16, v28, 1, v0 - vadd.vv v10, v10, v16 + vadd.vi v10, v10, 1, v0.t subw t2, t2, s0 - vadd.vx v14, v14, t5 - bnez t2, .LBB0_103 -# %bb.104: # %middle.block136 - # in Loop: Header=BB0_100 Depth=5 + vadd.vx v12, v12, t5 + bnez t2, .LBB0_102 +# %bb.103: # %middle.block136 + # in Loop: Header=BB0_99 Depth=5 vmv.s.x v12, zero vredsum.vs v8, v8, v12 vmv.x.s t2, v8 vredsum.vs v8, v10, v12 sext.w t5, t4 vmv.x.s t3, v8 - beq t5, a6, .LBB0_99 -.LBB0_105: # %for.body6.us.i.i.i.i.i.i.preheader - # in Loop: Header=BB0_100 Depth=5 + beq t5, a6, .LBB0_98 +.LBB0_104: # %for.body6.us.i.i.i.i.i.i.preheader + # in Loop: Header=BB0_99 Depth=5 addi t5, t4, 1 mul t4, a0, t4 addw t4, t1, t4 -.LBB0_106: # %for.body6.us.i.i.i.i.i.i +.LBB0_105: # %for.body6.us.i.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 - # Parent Loop BB0_100 Depth=5 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 + # Parent Loop BB0_99 Depth=5 # => This Inner Loop Header: Depth=6 addw t6, t0, t4 divu s1, t4, a1 @@ -964,46 +953,46 @@ srli t6, t6, 32 addi t5, t5, 1 addw t4, t4, a0 - bltu t6, a3, .LBB0_106 - j .LBB0_99 -.LBB0_107: # %if.then.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu t6, a3, .LBB0_105 + j .LBB0_98 +.LBB0_106: # %if.then.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li a2, 0 li a0, 0 - beqz s7, .LBB0_81 -.LBB0_108: # %for.inc.sink.split.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + beqz s7, .LBB0_80 +.LBB0_107: # %for.inc.sink.split.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 sd a2, 40(s4) -.LBB0_109: # %for.inc.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 +.LBB0_108: # %for.inc.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 mv a0, s7 - j .LBB0_81 -.LBB0_110: # %if.then11.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - beqz s7, .LBB0_80 -# %bb.111: # %if.end14.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + j .LBB0_80 +.LBB0_109: # %if.then11.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + beqz s7, .LBB0_79 +# %bb.110: # %if.end14.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a0, 0(s10) addi a0, a0, 2047 lw a0, 1161(a0) - beqz a0, .LBB0_119 + beqz a0, .LBB0_118 +# %bb.111: # %ddown.exit59.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + bltz a2, .LBB0_121 # %bb.112: # %ddown.exit59.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bltz a2, .LBB0_122 -# %bb.113: # %ddown.exit59.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bltz a3, .LBB0_123 -.LBB0_114: # %ddown.exit59.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + # in Loop: Header=BB0_81 Depth=4 + bltz a3, .LBB0_122 +.LBB0_113: # %ddown.exit59.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 divu a1, a2, a0 divu a0, a3, a0 - bne a1, a0, .LBB0_109 - j .LBB0_124 -.LBB0_115: # %if.then29.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bgtz a4, .LBB0_109 -# %bb.116: # %if.end33.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bne a1, a0, .LBB0_108 + j .LBB0_123 +.LBB0_114: # %if.then29.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + bgtz a4, .LBB0_108 +# %bb.115: # %if.end33.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a1, 0(s10) addi a1, a1, 2047 lw a1, 1161(a1) @@ -1012,9 +1001,9 @@ addiw a5, a1, -1 sub a2, a5, a2 sub a3, a5, a3 - blt a1, a0, .LBB0_90 -.LBB0_117: # %ddown.exit67.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + blt a1, a0, .LBB0_89 +.LBB0_116: # %ddown.exit67.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 addi a4, a1, -1 srai a5, a3, 63 and a4, a5, a4 @@ -1026,15 +1015,15 @@ add a2, a2, a3 sub a2, a2, a1 div a2, a2, a0 - bltu a2, a4, .LBB0_108 - j .LBB0_109 -.LBB0_118: # in Loop: Header=BB0_82 Depth=4 + bltu a2, a4, .LBB0_107 + j .LBB0_108 +.LBB0_117: # in Loop: Header=BB0_81 Depth=4 divw a0, zero, zero li a1, 50 - blt a1, a0, .LBB0_109 - j .LBB0_139 -.LBB0_119: # %ddown.exit.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + blt a1, a0, .LBB0_108 + j .LBB0_138 +.LBB0_118: # %ddown.exit.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 .Lpcrel_hi31: auipc a0, %pcrel_hi(.L.str.11) addi a0, a0, %pcrel_lo(.Lpcrel_hi31) @@ -1046,9 +1035,9 @@ ld a0, 0(s10) addi a0, a0, 2047 lw a0, 1161(a0) - bnez a0, .LBB0_121 -# %bb.120: # %cond.true.i58.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bnez a0, .LBB0_120 +# %bb.119: # %cond.true.i58.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 .Lpcrel_hi33: auipc a0, %pcrel_hi(.L.str.11) addi a0, a0, %pcrel_lo(.Lpcrel_hi33) @@ -1057,7 +1046,7 @@ addi a2, a1, %pcrel_lo(.Lpcrel_hi34) li a1, 588 call fancy_abort -.LBB0_121: # in Loop: Header=BB0_82 Depth=4 +.LBB0_120: # in Loop: Header=BB0_81 Depth=4 li a2, 0 csrr a0, vlenb slli a0, a0, 2 @@ -1074,69 +1063,69 @@ add a0, sp, a0 addi a0, a0, 368 vl2r.v v28, (a0) # Unknown-size Folded Reload - j .LBB0_108 -.LBB0_122: # in Loop: Header=BB0_82 Depth=4 + j .LBB0_107 +.LBB0_121: # in Loop: Header=BB0_81 Depth=4 add a2, a2, a0 addi a2, a2, -1 - bgez a3, .LBB0_114 -.LBB0_123: # in Loop: Header=BB0_82 Depth=4 + bgez a3, .LBB0_113 +.LBB0_122: # in Loop: Header=BB0_81 Depth=4 add a3, a3, a0 addi a3, a3, -1 divu a1, a2, a0 divu a0, a3, a0 - bne a1, a0, .LBB0_109 -.LBB0_124: # in Loop: Header=BB0_82 Depth=4 + bne a1, a0, .LBB0_108 +.LBB0_123: # in Loop: Header=BB0_81 Depth=4 li a2, 0 - j .LBB0_108 -.LBB0_125: # %compute_miss_rate.exit.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + j .LBB0_107 +.LBB0_124: # %compute_miss_rate.exit.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li a7, 1000 mul a7, t3, a7 divw a7, a7, t2 li t1, 50 - blt t1, a7, .LBB0_127 -# %bb.126: # %if.then86.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + blt t1, a7, .LBB0_126 +# %bb.125: # %if.then86.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a0, 40(s4) - bltu a2, a0, .LBB0_108 - j .LBB0_109 -.LBB0_127: # %if.end93.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu a2, a0, .LBB0_107 + j .LBB0_108 +.LBB0_126: # %if.end93.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 srli a7, s11, 1 li t1, 8 - bltu t1, a7, .LBB0_129 -# %bb.128: # %if.end93.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu t1, a7, .LBB0_128 +# %bb.127: # %if.end93.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li a7, 8 -.LBB0_129: # %if.end93.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 +.LBB0_128: # %if.end93.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li t1, 0 li t2, 0 li t3, 0 sub t0, a0, t0 - j .LBB0_131 -.LBB0_130: # %for.cond2.for.inc22_crit_edge.us.i94.i.i.i.i.i - # in Loop: Header=BB0_131 Depth=5 + j .LBB0_130 +.LBB0_129: # %for.cond2.for.inc22_crit_edge.us.i94.i.i.i.i.i + # in Loop: Header=BB0_130 Depth=5 add t1, t1, a5 slli t4, t1, 32 srli t4, t4, 32 - bgeu t4, a1, .LBB0_138 -.LBB0_131: # %for.cond2.preheader.us.i71.i.i.i.i.i + bgeu t4, a1, .LBB0_137 +.LBB0_130: # %for.cond2.preheader.us.i71.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 # => This Loop Header: Depth=5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 sltu t4, a6, a7 or t4, t4, a4 - beqz t4, .LBB0_133 -# %bb.132: # in Loop: Header=BB0_131 Depth=5 + beqz t4, .LBB0_132 +# %bb.131: # in Loop: Header=BB0_130 Depth=5 li t4, 0 - j .LBB0_136 -.LBB0_133: # %vector.ph115 - # in Loop: Header=BB0_131 Depth=5 + j .LBB0_135 +.LBB0_132: # %vector.ph115 + # in Loop: Header=BB0_130 Depth=5 srli t5, s11, 1 neg t4, t5 vsetvli t6, zero, e32, m1, tu, ma @@ -1153,13 +1142,14 @@ vmv.v.x v12, t1 mv t2, t4 vmv2r.v v14, v24 -.LBB0_134: # %vector.body120 +.LBB0_133: # %vector.body120 # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 - # Parent Loop BB0_131 Depth=5 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 + # Parent Loop BB0_130 Depth=5 # => This Inner Loop Header: Depth=6 + vsetvli zero, zero, e32, m2, ta, ma vmv2r.v v16, v14 vmadd.vx v16, a0, v12 vsetvli zero, zero, e64, m4, ta, ma @@ -1173,35 +1163,34 @@ vsetvli zero, zero, e64, m4, ta, ma vsext.vf2 v20, v16 vdivu.vx v20, v20, a1 - vsetvli zero, zero, e32, m2, ta, ma + vsetvli zero, zero, e32, m2, ta, mu vnsrl.wi v16, v20, 0 vmsne.vv v0, v18, v16 vadd.vi v8, v8, 1 - vmerge.vim v16, v28, 1, v0 - vadd.vv v10, v10, v16 + vadd.vi v10, v10, 1, v0.t subw t2, t2, s0 vadd.vx v14, v14, t5 - bnez t2, .LBB0_134 -# %bb.135: # %middle.block112 - # in Loop: Header=BB0_131 Depth=5 + bnez t2, .LBB0_133 +# %bb.134: # %middle.block112 + # in Loop: Header=BB0_130 Depth=5 vmv.s.x v12, zero vredsum.vs v8, v8, v12 vmv.x.s t2, v8 vredsum.vs v8, v10, v12 sext.w t5, t4 vmv.x.s t3, v8 - beq t5, a6, .LBB0_130 -.LBB0_136: # %for.body6.us.i75.i.i.i.i.i.preheader - # in Loop: Header=BB0_131 Depth=5 + beq t5, a6, .LBB0_129 +.LBB0_135: # %for.body6.us.i75.i.i.i.i.i.preheader + # in Loop: Header=BB0_130 Depth=5 addi t5, t4, 1 mul t4, a0, t4 addw t4, t1, t4 -.LBB0_137: # %for.body6.us.i75.i.i.i.i.i +.LBB0_136: # %for.body6.us.i75.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 - # Parent Loop BB0_131 Depth=5 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 + # Parent Loop BB0_130 Depth=5 # => This Inner Loop Header: Depth=6 addw t6, t0, t4 divu s1, t4, a1 @@ -1216,78 +1205,78 @@ srli t6, t6, 32 addi t5, t5, 1 addw t4, t4, a0 - bltu t6, a3, .LBB0_137 - j .LBB0_130 -.LBB0_138: # %for.end24.loopexit22.i98.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu t6, a3, .LBB0_136 + j .LBB0_129 +.LBB0_137: # %for.end24.loopexit22.i98.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li a0, 1000 mul a0, t3, a0 divw a0, a0, t2 li a1, 50 - blt a1, a0, .LBB0_109 -.LBB0_139: # %if.then98.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + blt a1, a0, .LBB0_108 +.LBB0_138: # %if.then98.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a0, 40(s4) addi a2, a2, 1 - bltu a2, a0, .LBB0_108 - j .LBB0_109 -.LBB0_140: # %prune_ref_by_reuse.exit.i.i.i - # in Loop: Header=BB0_73 Depth=3 + bltu a2, a0, .LBB0_107 + j .LBB0_108 +.LBB0_139: # %prune_ref_by_reuse.exit.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a0, 0(s3) - beqz a0, .LBB0_156 -# %bb.141: # %land.lhs.true.i.i.i12 - # in Loop: Header=BB0_73 Depth=3 + beqz a0, .LBB0_155 +# %bb.140: # %land.lhs.true.i.i.i12 + # in Loop: Header=BB0_72 Depth=3 .Lpcrel_hi35: auipc a1, %got_pcrel_hi(dump_flags) ld a1, %pcrel_lo(.Lpcrel_hi35)(a1) lbu a1, 0(a1) andi a1, a1, 8 - beqz a1, .LBB0_156 -# %bb.142: # %if.then.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beqz a1, .LBB0_155 +# %bb.141: # %if.then.i.i.i + # in Loop: Header=BB0_72 Depth=3 .Lpcrel_hi36: auipc a1, %pcrel_hi(.L.str.27) addi a1, a1, %pcrel_lo(.Lpcrel_hi36) mv a2, s4 call fprintf ld a0, 40(s4) - beqz a0, .LBB0_147 -# %bb.143: # %if.then.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beqz a0, .LBB0_146 +# %bb.142: # %if.then.i.i.i + # in Loop: Header=BB0_72 Depth=3 addi s1, s4, 32 li a1, -1 - bne a0, a1, .LBB0_149 -# %bb.144: # %land.lhs.true4.i.i.i - # in Loop: Header=BB0_73 Depth=3 + bne a0, a1, .LBB0_148 +# %bb.143: # %land.lhs.true4.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a0, 32(s4) - beq a0, a1, .LBB0_153 -# %bb.145: # %land.lhs.true4.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beq a0, a1, .LBB0_152 +# %bb.144: # %land.lhs.true4.i.i.i + # in Loop: Header=BB0_72 Depth=3 li a1, 1 - bne a0, a1, .LBB0_151 -# %bb.146: # %if.then6.i.i.i - # in Loop: Header=BB0_73 Depth=3 + bne a0, a1, .LBB0_150 +# %bb.145: # %if.then6.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi37: auipc a0, %pcrel_hi(.L.str.28) addi a0, a0, %pcrel_lo(.Lpcrel_hi37) - j .LBB0_148 -.LBB0_147: # %if.then10.i.i.i - # in Loop: Header=BB0_73 Depth=3 + j .LBB0_147 +.LBB0_146: # %if.then10.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi38: auipc a0, %pcrel_hi(.L.str.29) addi a0, a0, %pcrel_lo(.Lpcrel_hi38) -.LBB0_148: # %if.end34.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_147: # %if.end34.i.i.i + # in Loop: Header=BB0_72 Depth=3 li a1, 16 - j .LBB0_154 -.LBB0_149: # %if.else12.thread.i.i.i - # in Loop: Header=BB0_73 Depth=3 + j .LBB0_153 +.LBB0_148: # %if.else12.thread.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a1, 32(s4) - bgeu a1, a0, .LBB0_153 -# %bb.150: # %if.then21.i.i.i - # in Loop: Header=BB0_73 Depth=3 + bgeu a1, a0, .LBB0_152 +# %bb.149: # %if.then21.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi40: auipc a0, %pcrel_hi(.L.str.31) @@ -1302,12 +1291,12 @@ addi a1, a1, %pcrel_lo(.Lpcrel_hi41) call fprintf ld a0, 32(s4) -.LBB0_151: # %if.end.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_150: # %if.end.i.i.i + # in Loop: Header=BB0_72 Depth=3 li a1, 1 - beq a0, a1, .LBB0_155 -# %bb.152: # %if.then27.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beq a0, a1, .LBB0_154 +# %bb.151: # %if.then27.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi42: auipc a0, %pcrel_hi(.L.str.32) @@ -1321,20 +1310,20 @@ auipc a1, %pcrel_hi(.L.str.21) addi a1, a1, %pcrel_lo(.Lpcrel_hi43) call fprintf - j .LBB0_155 -.LBB0_153: # %if.then16.i.i.i - # in Loop: Header=BB0_73 Depth=3 + j .LBB0_154 +.LBB0_152: # %if.then16.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi39: auipc a0, %pcrel_hi(.L.str.30) addi a0, a0, %pcrel_lo(.Lpcrel_hi39) li a1, 14 -.LBB0_154: # %if.end34.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_153: # %if.end34.i.i.i + # in Loop: Header=BB0_72 Depth=3 li a2, 1 call fwrite -.LBB0_155: # %if.end34.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_154: # %if.end34.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a1, 0(s3) li a0, 10 call fputc @@ -1353,18 +1342,27 @@ add a0, sp, a0 addi a0, a0, 368 vl2r.v v28, (a0) # Unknown-size Folded Reload -.LBB0_156: # %for.inc.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_155: # %for.inc.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld s4, 56(s4) - bnez s4, .LBB0_72 - j .LBB0_70 + bnez s4, .LBB0_71 + j .LBB0_69 +.LBB0_156: # %if.then3.i + # in Loop: Header=BB0_32 Depth=1 +.Lpcrel_hi23: + auipc a0, %pcrel_hi(.L.str.15) + addi a0, a0, %pcrel_lo(.Lpcrel_hi23) + li a1, 22 + li a2, 1 + call fwrite + li s1, 0 + j .LBB0_381 .LBB0_157: # %for.body.i37.i.preheader # in Loop: Header=BB0_32 Depth=1 - li a5, 0 + li a3, 0 + ld a0, 80(sp) # 8-byte Folded Reload ld s5, 72(sp) # 8-byte Folded Reload - mv a0, s5 - li a3, -1 - ld a4, 240(sp) # 8-byte Folded Reload + li s1, -1 j .LBB0_159 .LBB0_158: # %for.inc5.i.i # in Loop: Header=BB0_159 Depth=2 @@ -1386,29 +1384,30 @@ # Parent Loop BB0_159 Depth=2 # => This Inner Loop Header: Depth=3 ld a2, 40(a1) - bne a2, a3, .LBB0_160 + bne a2, s1, .LBB0_160 # %bb.162: # %should_issue_prefetch_p.exit.i.i # in Loop: Header=BB0_161 Depth=3 lbu a2, 64(a1) andi a2, a2, 8 bnez a2, .LBB0_160 # %bb.163: # in Loop: Header=BB0_161 Depth=3 - addiw a5, a5, 1 + addiw a3, a3, 1 j .LBB0_160 .LBB0_164: # %estimate_prefetch_count.exit.i # in Loop: Header=BB0_32 Depth=1 - ld s4, 136(sp) # 8-byte Folded Reload - beqz a5, .LBB0_179 + ld s4, 144(sp) # 8-byte Folded Reload + beqz a3, .LBB0_179 # %bb.165: # %if.end9.i # in Loop: Header=BB0_32 Depth=1 - sd a5, 24(sp) # 8-byte Folded Spill - li s8, -1 - ld a0, 48(a4) + sd a3, 24(sp) # 8-byte Folded Spill + ld a0, 240(sp) # 8-byte Folded Reload + ld a0, 48(a0) sd zero, 336(sp) sd zero, 328(sp) beqz a0, .LBB0_190 .LBB0_166: # %determine_loop_nest_reuse.exit.i # in Loop: Header=BB0_32 Depth=1 + li s2, -1 .Lpcrel_hi51: auipc a0, %got_pcrel_hi(eni_time_weights) ld a1, %pcrel_lo(.Lpcrel_hi51)(a0) @@ -1417,7 +1416,8 @@ call tree_num_loop_insns ld a1, 0(s10) addi a1, a1, 2047 - lw s2, 1065(a1) + lw a1, 1065(a1) + sd a1, 208(sp) # 8-byte Folded Spill mv s9, a0 mv a0, s1 li a1, 0 @@ -1430,7 +1430,7 @@ call tree_num_loop_insns ld a1, 0(s10) lw a1, 840(a1) - mv a5, a0 + mv s8, a0 divuw a0, a1, a0 slti a1, s7, 0 not a1, a1 @@ -1451,9 +1451,8 @@ bltu s1, a0, .LBB0_183 # %bb.169: # %for.body.i83.i.preheader # in Loop: Header=BB0_32 Depth=1 - sd a5, 208(sp) # 8-byte Folded Spill li s5, 1 - ld s3, 72(sp) # 8-byte Folded Reload + ld s3, 80(sp) # 8-byte Folded Reload j .LBB0_171 .LBB0_170: # %for.inc21.i.i # in Loop: Header=BB0_171 Depth=2 @@ -1475,7 +1474,7 @@ # Parent Loop BB0_171 Depth=2 # => This Inner Loop Header: Depth=3 ld a0, 40(s4) - bne a0, s8, .LBB0_172 + bne a0, s2, .LBB0_172 # %bb.174: # %should_issue_prefetch_p.exit.i93.i # in Loop: Header=BB0_173 Depth=3 lbu a0, 64(s4) @@ -1496,14 +1495,16 @@ mv a0, s4 call free li s1, 0 - ld s4, 136(sp) # 8-byte Folded Reload - ld a0, 160(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload j .LBB0_381 .LBB0_178: # in Loop: Header=BB0_32 Depth=1 - ld s4, 136(sp) # 8-byte Folded Reload + li s1, 0 + ld s4, 144(sp) # 8-byte Folded Reload + ld s5, 72(sp) # 8-byte Folded Reload j .LBB0_380 .LBB0_179: # in Loop: Header=BB0_32 Depth=1 li s1, 0 + ld s3, 80(sp) # 8-byte Folded Reload j .LBB0_377 .LBB0_180: # %for.end23.i.i # in Loop: Header=BB0_32 Depth=1 @@ -1511,9 +1512,8 @@ ld a0, 240(sp) # 8-byte Folded Reload mv a1, s5 call can_unroll_loop_p - ld s3, 152(sp) # 8-byte Folded Reload - ld s4, 136(sp) # 8-byte Folded Reload - ld a5, 208(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload beqz a0, .LBB0_182 # %bb.181: # %should_unroll_loop_p.exit.i.i # in Loop: Header=BB0_32 Depth=1 @@ -1526,6 +1526,7 @@ .LBB0_183: # %determine_unroll_factor.exit.i # in Loop: Header=BB0_32 Depth=1 ld a0, 0(s3) + ld s2, 208(sp) # 8-byte Folded Reload add s2, s9, s2 addi s2, s2, -1 divuw s9, s2, s9 @@ -1547,11 +1548,10 @@ mv a2, s9 mv a3, s5 mv a4, s7 - mv s2, a5 + mv a5, s8 mv a6, s1 ld a7, 24(sp) # 8-byte Folded Reload call fprintf - mv a5, s2 .LBB0_186: # %if.end20.i # in Loop: Header=BB0_32 Depth=1 beqz s1, .LBB0_189 @@ -1560,14 +1560,15 @@ ld a0, 0(s10) addi a0, a0, 2047 lw a1, 1641(a0) - divuw a2, a5, s1 + divuw a2, s8, s1 bge a2, a1, .LBB0_307 .LBB0_188: # in Loop: Header=BB0_32 Depth=1 li s1, 0 .LBB0_189: # in Loop: Header=BB0_32 Depth=1 - ld s8, 216(sp) # 8-byte Folded Reload - ld s9, 80(sp) # 8-byte Folded Reload ld s5, 72(sp) # 8-byte Folded Reload + ld s8, 216(sp) # 8-byte Folded Reload + ld s9, 88(sp) # 8-byte Folded Reload + ld s3, 80(sp) # 8-byte Folded Reload j .LBB0_377 .LBB0_190: # %while.cond.preheader.i.i # in Loop: Header=BB0_32 Depth=1 @@ -1576,6 +1577,7 @@ ld a0, 32(a0) ld a0, 24(a0) ld a1, 240(sp) # 8-byte Folded Reload + ld s5, 136(sp) # 8-byte Folded Reload .LBB0_191: # %while.cond.i.i16 # Parent Loop BB0_32 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1610,25 +1612,23 @@ mv a0, s8 call find_loop_nest ld a0, 328(sp) - li s1, -1 beqz a0, .LBB0_199 # %bb.198: # %cond.true.i.i.i20 # in Loop: Header=BB0_32 Depth=1 lw a0, 0(a0) .LBB0_199: # %VEC_loop_p_base_length.exit.i.i21 # in Loop: Header=BB0_32 Depth=1 - ld s5, 72(sp) # 8-byte Folded Reload sd a0, 208(sp) # 8-byte Folded Spill slli a0, a0, 32 srli a1, a0, 32 - sd a1, 144(sp) # 8-byte Folded Spill + sd a1, 152(sp) # 8-byte Folded Spill srli a0, a0, 30 call xmalloc ld a1, 0(s10) - sd a0, 88(sp) # 8-byte Folded Spill + sd a0, 96(sp) # 8-byte Folded Spill li a5, 0 addi a0, a1, 2047 - mv a1, s5 + ld a1, 80(sp) # 8-byte Folded Reload j .LBB0_201 .LBB0_200: # %for.inc7.i.i.i # in Loop: Header=BB0_201 Depth=2 @@ -1660,7 +1660,7 @@ j .LBB0_202 .LBB0_205: # %while.cond10.outer.i.i.preheader # in Loop: Header=BB0_32 Depth=1 - ld s1, 88(sp) # 8-byte Folded Reload + ld s1, 96(sp) # 8-byte Folded Reload addi s1, s1, -4 ld s2, 208(sp) # 8-byte Folded Reload .LBB0_206: # %while.cond10.outer.i.i @@ -1691,8 +1691,7 @@ # %bb.209: # %if.end19.i.i # in Loop: Header=BB0_206 Depth=2 sd s1, 200(sp) # 8-byte Folded Spill - mv s1, s5 - mv s5, a5 + mv s1, a5 ld a1, 328(sp) add a0, a1, a0 ld s7, 0(a0) @@ -1708,16 +1707,14 @@ srli a0, a0, 32 .LBB0_211: # %if.end33.i.i # in Loop: Header=BB0_206 Depth=2 - mulw a5, s5, a0 - mv s5, s1 + mulw a5, s1, a0 ld s1, 200(sp) # 8-byte Folded Reload j .LBB0_206 .LBB0_212: # %for.body.i58.i.loopexit # in Loop: Header=BB0_32 Depth=1 sd a5, 192(sp) # 8-byte Folded Spill li s7, 0 - mv s1, s5 - ld s5, 128(sp) # 8-byte Folded Reload + ld s1, 80(sp) # 8-byte Folded Reload j .LBB0_214 .LBB0_213: # %for.inc51.i.i # in Loop: Header=BB0_214 Depth=2 @@ -1791,9 +1788,9 @@ li a3, 0 addi a0, s7, 8 sd a0, 48(sp) # 8-byte Folded Spill - ld a2, 144(sp) # 8-byte Folded Reload + ld a2, 152(sp) # 8-byte Folded Reload slli a0, a2, 2 - ld a1, 88(sp) # 8-byte Folded Reload + ld a1, 96(sp) # 8-byte Folded Reload add a0, a1, a0 addi a0, a0, -4 sd a0, 40(sp) # 8-byte Folded Spill @@ -1812,17 +1809,17 @@ # Child Loop BB0_230 Depth 4 # Child Loop BB0_245 Depth 4 # Child Loop BB0_257 Depth 3 - sd s5, 128(sp) # 8-byte Folded Spill - sd a3, 112(sp) # 8-byte Folded Spill + sd s5, 136(sp) # 8-byte Folded Spill + sd a3, 120(sp) # 8-byte Folded Spill slli a0, a3, 3 ld a1, 48(sp) # 8-byte Folded Reload add a0, a1, a0 ld s1, 0(a0) ld s3, 8(s1) li a1, 8 - ld a0, 144(sp) # 8-byte Folded Reload + ld a0, 152(sp) # 8-byte Folded Reload call xcalloc - sd s1, 104(sp) # 8-byte Folded Spill + sd s1, 112(sp) # 8-byte Folded Spill ld a1, 80(s1) sd a0, 200(sp) # 8-byte Folded Spill ld a4, 232(sp) # 8-byte Folded Reload @@ -2027,9 +2024,9 @@ addi a2, a2, -8 ld a3, 32(sp) # 8-byte Folded Reload ld a4, 40(sp) # 8-byte Folded Reload - ld s3, 152(sp) # 8-byte Folded Reload - ld s5, 128(sp) # 8-byte Folded Reload - ld s4, 104(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s5, 136(sp) # 8-byte Folded Reload + ld s4, 112(sp) # 8-byte Folded Reload j .LBB0_257 .LBB0_256: # %cleanup.i.i.i # in Loop: Header=BB0_257 Depth=3 @@ -2059,9 +2056,9 @@ j .LBB0_261 .LBB0_260: # in Loop: Header=BB0_225 Depth=2 li s1, -1 - ld s3, 152(sp) # 8-byte Folded Reload - ld s5, 128(sp) # 8-byte Folded Reload - ld s4, 104(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s5, 136(sp) # 8-byte Folded Reload + ld s4, 112(sp) # 8-byte Folded Reload .LBB0_261: # %self_reuse_distance.exit.i.i # in Loop: Header=BB0_225 Depth=2 ld a0, 200(sp) # 8-byte Folded Reload @@ -2074,8 +2071,8 @@ sw s1, 48(a0) .LBB0_263: # %if.end71.i.i # in Loop: Header=BB0_225 Depth=2 - ld s4, 136(sp) # 8-byte Folded Reload - ld a3, 112(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload + ld a3, 120(sp) # 8-byte Folded Reload beqz s5, .LBB0_224 # %bb.264: # %if.then73.i.i # in Loop: Header=BB0_225 Depth=2 @@ -2099,13 +2096,13 @@ addiw t0, a6, -1 slli a1, t0, 32 srli t1, a1, 32 - sd t1, 128(sp) # 8-byte Folded Spill + sd t1, 136(sp) # 8-byte Folded Spill j .LBB0_268 .LBB0_267: # %for.inc208.i.i # in Loop: Header=BB0_268 Depth=2 ld a0, 336(sp) addiw a7, a7, 1 - ld s3, 152(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload beqz a0, .LBB0_301 .LBB0_268: # %land.lhs.true.i104.i.i # Parent Loop BB0_32 Depth=1 @@ -2181,7 +2178,7 @@ # %bb.279: # %for.body.i120.i.i.preheader # in Loop: Header=BB0_275 Depth=3 mv a2, a1 - ld a3, 144(sp) # 8-byte Folded Reload + ld a3, 152(sp) # 8-byte Folded Reload .LBB0_280: # %for.body.i120.i.i # Parent Loop BB0_32 Depth=1 # Parent Loop BB0_268 Depth=2 @@ -2273,7 +2270,7 @@ call fancy_abort ld t4, 168(sp) # 8-byte Folded Reload ld t3, 176(sp) # 8-byte Folded Reload - ld t1, 128(sp) # 8-byte Folded Reload + ld t1, 136(sp) # 8-byte Folded Reload mv t0, s9 ld a7, 200(sp) # 8-byte Folded Reload lw a2, 0(s5) @@ -2282,7 +2279,7 @@ # in Loop: Header=BB0_275 Depth=3 srli a1, s2, 32 slli a1, a1, 2 - ld a3, 88(sp) # 8-byte Folded Reload + ld a3, 96(sp) # 8-byte Folded Reload add a1, a3, a1 lw a3, 0(a1) add s1, a0, s1 @@ -2316,16 +2313,16 @@ sw s3, 48(s8) j .LBB0_267 .LBB0_300: # in Loop: Header=BB0_268 Depth=2 - ld s4, 136(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload lw a0, 48(t3) bltu s3, a0, .LBB0_297 j .LBB0_298 -.LBB0_301: # in Loop: Header=BB0_32 Depth=1 - li s8, -1 +.LBB0_301: # %for.end210.i.i + # in Loop: Header=BB0_32 Depth=1 call free_dependence_relations mv a0, s7 call free_data_refs - ld a0, 88(sp) # 8-byte Folded Reload + ld a0, 96(sp) # 8-byte Folded Reload call free ld a3, 0(s3) beqz a3, .LBB0_166 @@ -2345,7 +2342,7 @@ li a1, 17 li a2, 1 call fwrite - ld s1, 72(sp) # 8-byte Folded Reload + ld s1, 80(sp) # 8-byte Folded Reload j .LBB0_305 .LBB0_304: # %for.inc228.i.i # in Loop: Header=BB0_305 Depth=2 @@ -2391,9 +2388,10 @@ ld a1, %pcrel_lo(.Lpcrel_hi55)(a1) lbu a1, 0(a1) andi a1, a1, 8 - ld s8, 216(sp) # 8-byte Folded Reload - ld s9, 80(sp) # 8-byte Folded Reload ld s5, 72(sp) # 8-byte Folded Reload + ld s8, 216(sp) # 8-byte Folded Reload + ld s9, 88(sp) # 8-byte Folded Reload + ld s3, 80(sp) # 8-byte Folded Reload bnez a1, .LBB0_319 # %bb.311: # in Loop: Header=BB0_32 Depth=1 li s1, 0 @@ -2402,12 +2400,14 @@ # in Loop: Header=BB0_32 Depth=1 lw a0, 1609(a0) ld a1, 24(sp) # 8-byte Folded Reload - divuw a1, a5, a1 + divuw a1, s8, a1 blt a1, a0, .LBB0_188 .LBB0_313: # %if.end24.i # in Loop: Header=BB0_32 Depth=1 ld a0, 240(sp) # 8-byte Folded Reload ld a0, 48(a0) + li s2, 14 + lui s8, 6 bnez a0, .LBB0_359 # %bb.314: # %if.end.i.i108.i # in Loop: Header=BB0_32 Depth=1 @@ -2500,7 +2500,7 @@ .LBB0_325: # %for.body.i121.i.preheader # in Loop: Header=BB0_32 Depth=1 li s4, 0 - ld s7, 72(sp) # 8-byte Folded Reload + ld s7, 80(sp) # 8-byte Folded Reload j .LBB0_327 .LBB0_326: # %for.inc8.i.i # in Loop: Header=BB0_327 Depth=2 @@ -2538,8 +2538,7 @@ ld a0, 16(a0) lwu a1, 0(a0) and a1, a1, a2 - li a3, 14 - bne a1, a3, .LBB0_333 + bne a1, s2, .LBB0_333 # %bb.332: # %cond.true.i.i.i.i # in Loop: Header=BB0_328 Depth=3 call vector_type_mode @@ -2558,15 +2557,14 @@ auipc a1, %got_pcrel_hi(optab_table) ld a1, %pcrel_lo(.Lpcrel_hi58)(a1) slli a0, a0, 2 - lui a3, 6 - add a1, a1, a3 + add a1, a1, s8 add a0, a1, a0 lw a0, -352(a0) - ld a1, 120(sp) # 8-byte Folded Reload + ld a1, 128(sp) # 8-byte Folded Reload beq a0, a1, .LBB0_330 # %bb.335: # %if.end.i9.i.i # in Loop: Header=BB0_328 Depth=3 - ld a0, 152(sp) # 8-byte Folded Reload + ld a0, 160(sp) # 8-byte Folded Reload ld a0, 0(a0) beqz a0, .LBB0_338 # %bb.336: # %land.lhs.true.i10.i.i @@ -2601,7 +2599,7 @@ j .LBB0_326 .LBB0_339: # %for.end10.i.i # in Loop: Header=BB0_32 Depth=1 - ld s3, 152(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload beqz s4, .LBB0_359 # %bb.340: # %for.end10.i.i # in Loop: Header=BB0_32 Depth=1 @@ -2617,10 +2615,11 @@ # in Loop: Header=BB0_32 Depth=1 mv s3, a0 lw a0, 0(a0) + ld s1, 208(sp) # 8-byte Folded Reload beqz a0, .LBB0_357 # %bb.343: # %for.body.i17.i.i.preheader # in Loop: Header=BB0_32 Depth=1 - li s1, 0 + li s8, 0 addi s2, s3, 8 j .LBB0_346 .LBB0_344: # in Loop: Header=BB0_346 Depth=2 @@ -2638,17 +2637,16 @@ mv a0, s4 call mark_virtual_ops_for_renaming lwu a0, 0(s3) - addi s1, s1, 1 - bgeu s1, a0, .LBB0_357 + addi s8, s8, 1 + bgeu s8, a0, .LBB0_357 .LBB0_346: # %for.body.i17.i.i # Parent Loop BB0_32 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_354 Depth 3 - slli a0, s1, 3 + slli a0, s8, 3 add a0, s2, a0 ld s7, 0(a0) - ld a0, 208(sp) # 8-byte Folded Reload - ld a0, 0(a0) + ld a0, 0(s1) li a1, 0 call gimple_build_call ld a1, 8(s7) @@ -2711,9 +2709,10 @@ # in Loop: Header=BB0_32 Depth=1 lui a0, 4 call update_ssa - ld s3, 152(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload .LBB0_359: # %mark_nontemporal_stores.exit.i # in Loop: Header=BB0_32 Depth=1 + li s1, -1 ld a0, 0(s10) addi a1, a0, 2047 ld a0, 0(s3) @@ -2743,8 +2742,8 @@ slli a0, s5, 32 srli a0, a0, 32 addi a0, a0, -1 - ld a1, 72(sp) # 8-byte Folded Reload - ld s4, 136(sp) # 8-byte Folded Reload + ld a1, 80(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload j .LBB0_364 .LBB0_363: # %for.inc24.i.i # in Loop: Header=BB0_364 Depth=2 @@ -2770,7 +2769,7 @@ # Parent Loop BB0_364 Depth=2 # => This Inner Loop Header: Depth=3 ld a4, 40(a3) - bne a4, s8, .LBB0_366 + bne a4, s1, .LBB0_366 # %bb.368: # %should_issue_prefetch_p.exit.i163.i # in Loop: Header=BB0_367 Depth=3 lbu a4, 64(a3) @@ -2800,8 +2799,8 @@ # %bb.373: # %if.end28.split.i # in Loop: Header=BB0_32 Depth=1 li a1, 1 - ld s5, 72(sp) # 8-byte Folded Reload - mv a0, s5 + ld s3, 80(sp) # 8-byte Folded Reload + mv a0, s3 mv a2, s9 call issue_prefetches li s1, 0 @@ -2816,29 +2815,31 @@ mv a0, s1 mv a1, s5 call tree_unroll_loop - ld a0, 72(sp) # 8-byte Folded Reload + ld s3, 80(sp) # 8-byte Folded Reload + mv a0, s3 mv a1, s5 - mv s5, a0 mv a2, s9 call issue_prefetches li s1, 1 .LBB0_375: # %for.body.i173.preheader.i # in Loop: Header=BB0_32 Depth=1 + ld s5, 72(sp) # 8-byte Folded Reload ld s8, 216(sp) # 8-byte Folded Reload - ld s9, 80(sp) # 8-byte Folded Reload + ld s9, 88(sp) # 8-byte Folded Reload j .LBB0_377 .LBB0_376: # %for.end.i.i # in Loop: Header=BB0_377 Depth=2 - mv a0, s5 + mv a0, s7 call free - mv s5, s2 + mv s3, s2 beqz s2, .LBB0_379 .LBB0_377: # %for.body.i173.i # Parent Loop BB0_32 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_378 Depth 3 - ld a0, 16(s5) - ld s2, 24(s5) + ld a0, 16(s3) + mv s7, s3 + ld s2, 24(s3) beqz a0, .LBB0_376 .LBB0_378: # %for.body3.i176.i # Parent Loop BB0_32 Depth=1 @@ -2850,18 +2851,15 @@ bnez s3, .LBB0_378 j .LBB0_376 .LBB0_379: # in Loop: Header=BB0_32 Depth=1 - ld s3, 152(sp) # 8-byte Folded Reload - ld s2, 96(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s2, 104(sp) # 8-byte Folded Reload .LBB0_380: # %loop_prefetch_arrays.exit # in Loop: Header=BB0_32 Depth=1 - ld a0, 160(sp) # 8-byte Folded Reload - ld s5, 56(sp) # 8-byte Folded Reload - ld s7, 64(sp) # 8-byte Folded Reload + ld s7, 56(sp) # 8-byte Folded Reload .LBB0_381: # %loop_prefetch_arrays.exit # in Loop: Header=BB0_32 Depth=1 ld a3, 0(s3) - or a0, s1, a0 - sd a0, 160(sp) # 8-byte Folded Spill + or s5, s1, s5 beqz a3, .LBB0_384 # %bb.382: # %land.lhs.true61 # in Loop: Header=BB0_32 Depth=1 @@ -2955,8 +2953,7 @@ .LBB0_395: # %if.then.i7.i mv a0, s2 call free - ld a0, 160(sp) # 8-byte Folded Reload - beqz a0, .LBB0_397 + beqz s5, .LBB0_397 # %bb.396: # %if.then68 call scev_reset li s0, 32 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-predcom.s 2024-04-01 12:41:00.522417690 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-predcom.s 2024-04-01 12:41:12.586081261 +0000 @@ -5773,8 +5773,7 @@ mv a4, a3 mv a5, a1 addi a6, sp, 192 - vl1r.v v12, (a6) # Unknown-size Folded Reload - vmv1r.v v8, v12 + vl1r.v v8, (a6) # Unknown-size Folded Reload ld a7, 24(sp) # 8-byte Folded Reload li t1, 24 .LBB9_109: # %vector.body @@ -5786,9 +5785,8 @@ vsetvli zero, zero, e8, mf4, ta, ma vluxei64.v v9, (t1), v10 vmseq.vi v0, v9, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v12, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, t0 add a5, a5, a7 bnez a4, .LBB9_109 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/curve/editcurve.s 2024-04-01 12:40:58.926462198 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/curve/editcurve.s 2024-04-01 12:41:10.926127552 +0000 @@ -4479,8 +4479,7 @@ mul t4, s5, t4 mv t5, t3 addi t6, sp, 80 - vl2r.v v16, (t6) # Unknown-size Folded Reload - vmv2r.v v8, v16 + vl2r.v v8, (t6) # Unknown-size Folded Reload csrr t6, vlenb slli t6, t6, 1 add t6, sp, t6 @@ -4495,9 +4494,8 @@ vluxei64.v v10, (t6), v12 vand.vx v10, v10, s2 vmsne.vi v0, v10, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v10, v16, 1, v0 - vadd.vv v8, v8, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub t5, t5, s7 add t2, t2, t4 bnez t5, .LBB16_27 @@ -7870,9 +7868,8 @@ vsoxei64.v v20, (t6), v28, v0.t vluxei64.v v8, (t6), v28 vmsne.vi v0, v8, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v8, v10, 1, v0 - vadd.vv v22, v22, v8 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v22, v22, 1, v0.t sub s10, s10, a4 add s9, s9, t2 bnez s10, .LBB40_23 @@ -7939,14 +7936,13 @@ subw s7, s7, ra mul s11, ra, t1 add s11, s9, s11 + li t0, 18 + mul s10, a3, t0 vsetvli zero, zero, e64, m4, ta, ma vmul.vx v24, v12, t1 vsetvli zero, zero, e32, m2, ta, ma vmv.v.i v8, 0 - li t0, 18 - mul s10, a3, t0 mv s2, ra - vmv.v.i v22, 0 .LBB40_35: # %vector.body166 # Parent Loop BB40_7 Depth=1 # => This Inner Loop Header: Depth=2 @@ -7956,22 +7952,21 @@ vluxei64.v v21, (s4), v28 vand.vi v21, v21, 1 vmsne.vi v0, v21, 0 - vluxei64.v v7, (s5), v28, v0.t + vluxei64.v v22, (s5), v28, v0.t vmseq.vi v0, v21, 0 vmv.v.i v21, 1 vsoxei64.v v21, (s5), v28, v0.t - vmsne.vi v21, v7, 0 + vmsne.vi v21, v22, 0 vmor.mm v0, v0, v21 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v28, v8, 1, v0 - vadd.vv v22, v22, v28 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub s2, s2, a4 add s9, s9, s10 bnez s2, .LBB40_35 # %bb.36: # %middle.block154 # in Loop: Header=BB40_7 Depth=1 - vmv.s.x v8, zero - vredsum.vs v8, v22, v8 + vmv.s.x v21, zero + vredsum.vs v8, v8, v21 vmv.x.s s10, v8 beq ra, s8, .LBB40_60 .LBB40_37: # %while.body75.preheader186 @@ -8055,9 +8050,8 @@ vsoxei64.v v20, (t6), v24, v0.t vluxei64.v v8, (t6), v24 vmsne.vi v0, v8, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v24, v10, 1, v0 - vadd.vv v22, v22, v24 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v22, v22, 1, v0.t sub s10, s10, a4 add s9, s9, t2 bnez s10, .LBB40_42 @@ -8126,42 +8120,40 @@ subw s7, s7, s2 mul s11, s2, t1 add s11, s9, s11 + li t0, 18 + mul s10, a3, t0 vsetvli zero, zero, e64, m4, ta, ma vmul.vx v24, v12, t1 vsetvli zero, zero, e32, m2, ta, ma vmv.v.i v22, 0 - li t0, 18 - mul s10, a3, t0 mv ra, s2 - vmv.v.i v28, 0 .LBB40_54: # %vector.body141 # Parent Loop BB40_7 Depth=1 # => This Inner Loop Header: Depth=2 vsetvli zero, zero, e64, m4, ta, ma - vadd.vx v4, v24, s9 + vadd.vx v28, v24, s9 vsetvli zero, zero, e16, m1, ta, ma - vluxei64.v v9, (s4), v4 + vluxei64.v v9, (s4), v28 vand.vi v21, v9, 1 - vmv.v.i v30, 1 + vmv.v.i v7, 1 vmseq.vi v8, v21, 0 vmsne.vi v0, v21, 0 vand.vi v9, v9, -2 - vsoxei64.v v9, (s4), v4, v0.t - vsoxei64.v v30, (s5), v4, v0.t + vsoxei64.v v9, (s4), v28, v0.t + vsoxei64.v v7, (s5), v28, v0.t vmv.v.v v0, v8 - vluxei64.v v8, (s5), v4, v0.t + vluxei64.v v8, (s5), v28, v0.t vor.vv v8, v21, v8 vmsne.vi v0, v8, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v8, v22, 1, v0 - vadd.vv v28, v28, v8 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v22, v22, 1, v0.t sub ra, ra, a4 add s9, s9, s10 bnez ra, .LBB40_54 # %bb.55: # %middle.block129 # in Loop: Header=BB40_7 Depth=1 vmv.s.x v8, zero - vredsum.vs v8, v28, v8 + vredsum.vs v8, v22, v8 vmv.x.s s10, v8 beq s2, s8, .LBB40_60 .LBB40_56: # %while.body75.us.preheader185 @@ -11046,8 +11038,9 @@ mul s8, s10, t1 add s8, s11, s8 mul s7, a2, ra + vsetvli zero, zero, e32, m2, ta, ma + vmv.v.i v14, 0 mv ra, s10 - vmv2r.v v14, v12 .LBB52_30: # %vector.body570 # Parent Loop BB52_6 Depth=1 # => This Inner Loop Header: Depth=2 @@ -11111,9 +11104,8 @@ vand.vi v21, v25, 1 vmsne.vi v21, v21, 0 vmor.mm v0, v20, v21 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v20, v12, 1, v0 - vadd.vv v14, v14, v20 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v14, v14, 1, v0.t sub s7, s7, a3 add s8, s8, t2 bnez s7, .LBB52_35 @@ -11250,8 +11242,9 @@ mul s5, s8, t1 add s5, s9, s5 mul s6, a2, ra + vsetvli zero, zero, e32, m2, ta, ma + vmv.v.i v14, 0 mv s10, s8 - vmv2r.v v14, v12 .LBB52_60: # %vector.body498 # Parent Loop BB52_6 Depth=1 # => This Inner Loop Header: Depth=2 @@ -11314,9 +11307,8 @@ vand.vi v21, v25, 1 vmsne.vi v21, v21, 0 vmor.mm v0, v20, v21 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v20, v12, 1, v0 - vadd.vv v14, v14, v20 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v14, v14, 1, v0.t sub s6, s6, a3 add s7, s7, t2 bnez s6, .LBB52_65 @@ -14807,9 +14799,8 @@ vand.vi v21, v25, 1 vmsne.vi v21, v21, 0 vmor.mm v0, v20, v21 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v20, v12, 1, v0 - vadd.vv v14, v14, v20 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v14, v14, 1, v0.t sub s5, s5, a5 add s6, s6, t3 bnez s5, .LBB62_21 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/bmesh/intern/bmesh_operators.s 2024-04-01 12:40:58.894463091 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/bmesh/intern/bmesh_operators.s 2024-04-01 12:41:10.894128445 +0000 @@ -1213,21 +1213,19 @@ add a1, a3, a1 slli a5, a5, 1 vsetvli a6, zero, e32, m1, ta, ma - vmv.v.i v9, 0 + vmv.v.i v8, 0 li a6, 12 mv a7, a2 - vmv.v.i v8, 0 .LBB10_22: # %vector.body # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (a3) vsetvli zero, zero, e8, mf4, ta, ma - vluxei64.v v12, (a6), v10 - vsetvli zero, zero, e32, m1, ta, ma - vzext.vf4 v10, v12 - vand.vx v10, v10, s4 - vmsne.vi v0, v10, 0 - vmerge.vim v10, v9, 1, v0 - vadd.vv v8, v8, v10 + vluxei64.v v9, (a6), v10 + vsetvli zero, zero, e32, m1, ta, mu + vzext.vf4 v10, v9 + vand.vx v9, v10, s4 + vmsne.vi v0, v9, 0 + vadd.vi v8, v8, 1, v0.t sub a7, a7, a4 add a3, a3, a5 bnez a7, .LBB10_22 --- build.a/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/main.s 2024-04-01 12:41:03.014348194 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/main.s 2024-04-01 12:41:15.106010987 +0000 @@ -470,20 +470,19 @@ vmv.v.i v8, 0 li t2, 76 mv t3, a6 - vmv.v.i v9, 0 .LBB0_25: # %vector.body # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (t0) - vluxei64.v v12, (t2), v10 - vmseq.vi v0, v12, 1 - vmerge.vim v10, v8, 1, v0 - vadd.vv v9, v9, v10 + vsetvli zero, zero, e32, m1, ta, mu + vluxei64.v v9, (t2), v10 + vmseq.vi v0, v9, 1 + vadd.vi v8, v8, 1, v0.t sub t3, t3, a7 add t0, t0, t1 bnez t3, .LBB0_25 # %bb.26: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v9, v8 + vmv.s.x v9, zero + vredsum.vs v8, v8, v9 vmv.x.s a7, v8 beq a4, a6, .LBB0_29 .LBB0_27: # %for.body.preheader @@ -1029,20 +1028,19 @@ vmv.v.i v8, 0 li t1, 8 mv t2, a6 - vmv.v.i v9, 0 .LBB0_78: # %vector.body123 # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (a7) - vluxei64.v v12, (t1), v10 - vmsgt.vi v0, v12, 0 - vmerge.vim v10, v8, 1, v0 - vadd.vv v9, v9, v10 + vsetvli zero, zero, e32, m1, ta, mu + vluxei64.v v9, (t1), v10 + vmsgt.vi v0, v9, 0 + vadd.vi v8, v8, 1, v0.t sub t2, t2, a4 add a7, a7, t0 bnez t2, .LBB0_78 # %bb.79: # %middle.block114 - vmv.s.x v8, zero - vredsum.vs v8, v9, v8 + vmv.s.x v9, zero + vredsum.vs v8, v8, v9 vmv.x.s a4, v8 beq a5, a6, .LBB0_82 .LBB0_80: # %for.body313.preheader --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/mesh/editface.s 2024-04-01 12:40:58.946461641 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/mesh/editface.s 2024-04-01 12:41:10.946126995 +0000 @@ -2904,9 +2904,9 @@ .cfi_offset s10, -96 .cfi_offset s11, -104 csrr a4, vlenb - slli a4, a4, 2 + slli a4, a4, 1 sub sp, sp, a4 - .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xa0, 0x02, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 288 + 4 * vlenb + .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xa0, 0x02, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 288 + 2 * vlenb mv s0, a2 mv s5, a0 ld a0, 0(a2) @@ -3046,19 +3046,14 @@ auipc a0, %got_pcrel_hi(bmiter__elem_of_mesh_step) ld a0, %pcrel_lo(.Lpcrel_hi13)(a0) sd a0, 56(sp) # 8-byte Folded Spill - vsetvli a0, zero, e32, m1, ta, ma +.Lpcrel_hi14: + auipc a0, %pcrel_hi(mirrtopo_hash_sort) + vsetvli a1, zero, e32, m1, ta, ma vmv.v.i v8, 0 vsetvli zero, zero, e32, m1, tu, ma vmv.s.x v8, s0 - vsetvli a0, zero, e32, m2, ta, ma + vsetvli a1, zero, e32, m2, ta, ma vmv.v.i v10, 0 - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 176 - vs2r.v v10, (a0) # Unknown-size Folded Spill -.Lpcrel_hi14: - auipc a0, %pcrel_hi(mirrtopo_hash_sort) addi a0, a0, %pcrel_lo(.Lpcrel_hi14) sd a0, 72(sp) # 8-byte Folded Spill vmv1r.v v10, v8 @@ -3188,11 +3183,6 @@ ld a3, 72(sp) # 8-byte Folded Reload call qsort li s8, 1 - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 176 - vl2r.v v14, (a0) # Unknown-size Folded Reload li a0, 2 blt s7, a0, .LBB12_27 # %bb.21: # %for.body118.preheader @@ -3207,7 +3197,7 @@ ld a1, 80(sp) # 8-byte Folded Reload and a1, a1, a0 addi a0, a1, 1 - vsetvli a2, zero, e32, m2, ta, ma + vsetvli a2, zero, e32, m2, ta, mu mv a2, a1 mv a3, s3 addi a4, sp, 176 @@ -3219,8 +3209,7 @@ vl2re32.v v10, (a3) vl2re32.v v12, (a4) vmsne.vv v0, v10, v12 - vmerge.vim v10, v14, 1, v0 - vadd.vv v8, v8, v10 + vadd.vi v8, v8, 1, v0.t sub a2, a2, s11 add a3, a3, s4 bnez a2, .LBB12_23 @@ -3412,7 +3401,7 @@ ld a1, 32(sp) # 8-byte Folded Reload sw a1, 12(a0) csrr a0, vlenb - slli a0, a0, 2 + slli a0, a0, 1 add sp, sp, a0 ld ra, 280(sp) # 8-byte Folded Reload ld s0, 272(sp) # 8-byte Folded Reload --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-predcom.s 2024-04-01 12:40:59.662441673 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-predcom.s 2024-04-01 12:41:11.706105801 +0000 @@ -5773,8 +5773,7 @@ mv a4, a3 mv a5, a1 addi a6, sp, 192 - vl1r.v v12, (a6) # Unknown-size Folded Reload - vmv1r.v v8, v12 + vl1r.v v8, (a6) # Unknown-size Folded Reload ld a7, 24(sp) # 8-byte Folded Reload li t1, 24 .LBB9_109: # %vector.body @@ -5786,9 +5785,8 @@ vsetvli zero, zero, e8, mf4, ta, ma vluxei64.v v9, (t1), v10 vmseq.vi v0, v9, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v12, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, t0 add a5, a5, a7 bnez a4, .LBB9_109 --- build.a/External/SPEC/CINT2017speed/600.perlbench_s/CMakeFiles/600.perlbench_s.dir/root/cpu2017/benchspec/CPU/500.perlbench_r/src/doop.s 2024-04-01 12:41:00.162427729 +0000 +++ build.b/External/SPEC/CINT2017speed/600.perlbench_s/CMakeFiles/600.perlbench_s.dir/root/cpu2017/benchspec/CPU/500.perlbench_r/src/doop.s 2024-04-01 12:41:12.206091857 +0000 @@ -1360,28 +1360,26 @@ .LBB0_210: # %vector.ph neg a2, a3 and a2, a1, a2 - vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v8, 0 add a0, s2, a2 + vsetvli a4, zero, e32, m2, ta, ma + vmv.v.i v8, 0 mv a4, a2 - vmv.v.i v10, 0 .LBB0_211: # %vector.body # =>This Inner Loop Header: Depth=1 - vle8.v v12, (s2) vsetvli zero, zero, e8, mf2, ta, ma - vwaddu.vv v13, v12, v12 + vle8.v v10, (s2) + vwaddu.vv v11, v10, v10 vsetvli zero, zero, e16, m1, ta, ma - vluxei16.v v12, (s3), v13 - vmsgt.vi v0, v12, -1 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vluxei16.v v10, (s3), v11 + vmsgt.vi v0, v10, -1 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, a3 add s2, s2, a3 bnez a4, .LBB0_211 # %bb.212: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s1, v8 beq a1, a2, .LBB0_256 .LBB0_213: # %while.body.i34 --- build.a/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/box_algebra.s 2024-04-01 12:41:02.686357341 +0000 +++ build.b/MultiSource/Benchmarks/ASCI_Purple/SMG2000/CMakeFiles/smg2000.dir/box_algebra.s 2024-04-01 12:41:14.794019687 +0000 @@ -1512,24 +1512,23 @@ slli a0, a0, 31 sub a0, a0, a2 and a0, a0, ra - vsetvli a2, zero, e32, m2, ta, ma - vmv.v.i v8, 0 slli a2, a3, 1 + vsetvli a3, zero, e32, m2, ta, ma + vmv.v.i v8, 0 mv a3, a0 mv a4, s2 - vmv.v.i v10, 0 .LBB2_199: # %vector.body586 # =>This Inner Loop Header: Depth=1 - vl2re32.v v12, (a4) - vmsne.vi v0, v12, 0 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vl2re32.v v10, (a4) + vsetvli zero, zero, e32, m2, ta, mu + vmsne.vi v0, v10, 0 + vadd.vi v8, v8, 1, v0.t sub a3, a3, a1 add a4, a4, a2 bnez a3, .LBB2_199 # %bb.200: # %middle.block578 - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a1, v8 beq a0, ra, .LBB2_203 .LBB2_201: # %for.body325.preheader591 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/cfgloop.s 2024-04-01 12:41:00.254425164 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/cfgloop.s 2024-04-01 12:41:12.306089068 +0000 @@ -4315,7 +4315,6 @@ li a5, 8 mv a6, a2 mv a7, a0 - vmv.v.i v9, 0 .LBB32_7: # %vector.body # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (a7) @@ -4323,17 +4322,17 @@ vluxei64.v v10, (a5), v10 vmsne.vi v0, v10, 0 vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v12, (zero), v10, v0.t - vmsgtu.vi v10, v12, 1 - vmand.mm v0, v0, v10 - vmerge.vim v10, v8, 1, v0 - vadd.vv v9, v9, v10 + vluxei64.v v9, (zero), v10, v0.t + vmsgtu.vi v9, v9, 1 + vmand.mm v0, v0, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a6, a6, a3 add a7, a7, a4 bnez a6, .LBB32_7 # %bb.8: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v9, v8 + vmv.s.x v9, zero + vredsum.vs v8, v8, v9 vmv.x.s s0, v8 bne a2, a1, .LBB32_10 .LBB32_9: # %for.end --- build.a/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/refine.s 2024-04-01 12:41:02.810353883 +0000 +++ build.b/MultiSource/Benchmarks/DOE-ProxyApps-C/miniAMR/CMakeFiles/miniAMR.dir/refine.s 2024-04-01 12:41:14.918016230 +0000 @@ -1865,28 +1865,26 @@ addi a6, a1, 8 mv a7, a4 vmv.v.i v12, 0 - vmv.v.i v14, 0 .LBB2_206: # %vector.body # =>This Inner Loop Header: Depth=1 addi t0, a0, -64 - vlseg2e32.v v16, (t0) - vlseg2e32.v v18, (a0) - vwmul.vx v20, v16, a5 - vwmul.vx v24, v18, a5 + vsetvli zero, zero, e32, m2, ta, mu + vlseg2e32.v v14, (t0) + vlseg2e32.v v16, (a0) + vwmul.vx v20, v14, a5 + vwmul.vx v24, v16, a5 vluxei64.v v8, (a6), v20 - vluxei64.v v16, (a6), v24 + vluxei64.v v14, (a6), v24 vmseq.vi v0, v8, 1 - vmseq.vi v8, v16, 1 - vmerge.vim v16, v10, 1, v0 + vmseq.vi v8, v14, 1 + vadd.vi v10, v10, 1, v0.t vmv1r.v v0, v8 - vmerge.vim v8, v10, 1, v0 - vadd.vv v12, v12, v16 - vadd.vv v14, v14, v8 + vadd.vi v12, v12, 1, v0.t addi a7, a7, -16 addi a0, a0, 128 bnez a7, .LBB2_206 # %bb.207: # %middle.block - vadd.vv v8, v14, v12 + vadd.vv v8, v12, v10 vmv.s.x v10, zero vredsum.vs v8, v8, v10 vmv.x.s a0, v8 --- build.a/External/SPEC/CFP2017rate/511.povray_r/CMakeFiles/511.povray_r.dir/root/cpu2017/benchspec/CPU/511.povray_r/src/prism.s 2024-04-01 12:40:58.598471346 +0000 +++ build.b/External/SPEC/CFP2017rate/511.povray_r/CMakeFiles/511.povray_r.dir/root/cpu2017/benchspec/CPU/511.povray_r/src/prism.s 2024-04-01 12:41:10.586137034 +0000 @@ -3302,16 +3302,15 @@ fld fs1, %pcrel_lo(.Lpcrel_hi58)(a1) addi s8, s1, 1 vsetvli a1, zero, e32, m1, ta, ma - vmv.v.i v24, 0 + vmv.v.i v8, 0 + addi a1, sp, 80 + vs1r.v v8, (a1) # Unknown-size Folded Spill vsetvli zero, zero, e64, m2, ta, ma - vid.v v26 + vid.v v24 csrr a1, vlenb - slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 80 - vs1r.v v24, (a1) # Unknown-size Folded Spill - addi a1, sp, 80 - vs2r.v v26, (a1) # Unknown-size Folded Spill + vs2r.v v24, (a1) # Unknown-size Folded Spill j .LBB13_9 .LBB13_6: li a0, 0 @@ -3377,12 +3376,9 @@ slli a2, a0, 32 srli a2, a2, 32 csrr a1, vlenb - slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 80 - vl1r.v v24, (a1) # Unknown-size Folded Reload - addi a1, sp, 80 - vl2r.v v26, (a1) # Unknown-size Folded Reload + vl2r.v v24, (a1) # Unknown-size Folded Reload bgeu a2, s5, .LBB13_15 # %bb.14: # in Loop: Header=BB13_9 Depth=1 mv a1, a0 @@ -3397,7 +3393,8 @@ and a3, a3, a2 sub a1, a0, a3 vsetvli a4, zero, e32, m1, tu, ma - vmv1r.v v8, v24 + addi a4, sp, 80 + vl1r.v v8, (a4) # Unknown-size Folded Reload vmv.s.x v8, s9 vsetvli zero, zero, e64, m2, ta, ma vfmv.v.f v10, fs2 @@ -3416,7 +3413,7 @@ vl2re64.v v18, (a4) addi a6, s5, -1 vsetvli zero, zero, e64, m2, ta, ma - vrsub.vx v20, v26, a6 + vrsub.vx v20, v24, a6 vrgather.vv v22, v18, v20 vmfge.vf v9, v22, fs0 vmfle.vf v18, v22, fs1 @@ -3428,9 +3425,8 @@ vmfge.vf v19, v20, fs0 vmand.mm v9, v9, v19 vmand.mm v0, v9, v18 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v24, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, s5 add a4, a4, s3 bnez a5, .LBB13_16 --- build.a/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/grid/grid_generator.s 2024-04-01 12:40:58.150483840 +0000 +++ build.b/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/grid/grid_generator.s 2024-04-01 12:41:10.186148188 +0000 @@ -18122,15 +18122,15 @@ ld a3, 72(s10) li a4, 0 li a5, 0 - li t2, 0 + li t3, 0 ld t1, 72(sp) # 8-byte Folded Reload srli a6, t1, 1 mul a7, a1, a2 add t0, a3, a2 srli t1, t1, 3 - vsetvli t3, zero, e32, m1, ta, ma + vsetvli t2, zero, e32, m1, ta, ma vmv.v.i v8, 0 - vsetvli t3, zero, e32, m2, ta, ma + vsetvli t2, zero, e32, m2, ta, ma vmv.v.i v10, 0 ld s3, 24(sp) # 8-byte Folded Reload j .LBB30_68 @@ -18144,7 +18144,7 @@ # Child Loop BB30_70 Depth 2 # Child Loop BB30_73 Depth 3 # Child Loop BB30_76 Depth 3 - li t3, 0 + li t2, 0 li t4, 0 slli t5, a4, 32 srli t6, t5, 32 @@ -18154,14 +18154,14 @@ .LBB30_69: # %invoke.cont132.for.cond.cleanup135_crit_edge.us.us.us # in Loop: Header=BB30_70 Depth=2 addi t4, t4, 1 - add t3, t3, a2 + add t2, t2, a2 beq t4, a1, .LBB30_67 .LBB30_70: # %invoke.cont132.preheader.us.us.us # Parent Loop BB30_68 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB30_73 Depth 3 # Child Loop BB30_76 Depth 3 - slli s0, t3, 32 + slli s0, t2, 32 srli s0, s0, 32 bgeu a2, a6, .LBB30_72 # %bb.71: # in Loop: Header=BB30_70 Depth=2 @@ -18174,9 +18174,9 @@ and s1, s1, a2 vsetvli zero, zero, e32, m2, tu, ma vmv1r.v v9, v8 - vmv.s.x v9, t2 + vmv.s.x v9, t3 vmv2r.v v12, v10 - add t2, t5, s0 + add t3, t5, s0 vmv1r.v v12, v9 mv s2, s1 .LBB30_73: # %vector.body581 @@ -18184,19 +18184,18 @@ # Parent Loop BB30_70 Depth=2 # => This Inner Loop Header: Depth=3 vsetvli zero, zero, e8, mf2, ta, ma - vle8.v v9, (t2) + vle8.v v9, (t3) vmsne.vi v0, v9, -1 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v14, v10, 1, v0 - vadd.vv v12, v12, v14 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v12, v12, 1, v0.t sub s2, s2, a6 - add t2, t2, a6 + add t3, t3, a6 bnez s2, .LBB30_73 # %bb.74: # %middle.block573 # in Loop: Header=BB30_70 Depth=2 vmv.s.x v9, zero vredsum.vs v9, v12, v9 - vmv.x.s t2, v9 + vmv.x.s t3, v9 beq s1, a2, .LBB30_69 .LBB30_75: # %invoke.cont140.us.us.us.preheader # in Loop: Header=BB30_70 Depth=2 @@ -18211,14 +18210,14 @@ addi s1, s1, -255 snez s1, s1 addi s0, s0, 1 - addw t2, t2, s1 + addw t3, t3, s1 bne s0, s2, .LBB30_76 j .LBB30_69 .LBB30_77: # %for.cond.cleanup120 - beqz t2, .LBB30_82 + beqz t3, .LBB30_82 # %bb.78: # %if.end.i.i.i.i.i.i.i - slli t2, t2, 32 - srli a0, t2, 32 + slli t3, t3, 32 + srli a0, t3, 32 li a1, 36 mul s4, a0, a1 .Ltmp640: --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/transform/transform_conversions.s 2024-04-01 12:40:59.034459186 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/transform/transform_conversions.s 2024-04-01 12:41:11.042124318 +0000 @@ -6876,7 +6876,13 @@ vid.v v8 vmul.vx v16, v8, a1 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v20, 0 + vmv.v.i v8, 0 + csrr a0, vlenb + slli a1, a0, 1 + add a0, a1, a0 + add a0, sp, a0 + addi a0, a0, 480 + vs2r.v v8, (a0) # Unknown-size Folded Spill li s11, 52 li s10, 12 csrr a0, vlenb @@ -6885,12 +6891,6 @@ add a0, sp, a0 addi a0, a0, 480 vs4r.v v16, (a0) # Unknown-size Folded Spill - csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 480 - vs2r.v v20, (a0) # Unknown-size Folded Spill j .LBB28_116 .LBB28_113: # in Loop: Header=BB28_116 Depth=1 li a1, 0 @@ -6935,12 +6935,6 @@ add a1, sp, a1 addi a1, a1, 480 vl4r.v v16, (a1) # Unknown-size Folded Reload - csrr a1, vlenb - slli a2, a1, 1 - add a1, a2, a1 - add a1, sp, a1 - addi a1, a1, 480 - vl2r.v v20, (a1) # Unknown-size Folded Reload beq a0, s4, .LBB28_136 # %bb.119: # %if.end41.i # in Loop: Header=BB28_116 Depth=1 @@ -7148,7 +7142,12 @@ li a2, 36 mul a2, s1, a2 mv a5, a4 - vmv2r.v v8, v20 + csrr a6, vlenb + slli a7, a6, 1 + add a6, a7, a6 + add a6, sp, a6 + addi a6, a6, 480 + vl2r.v v8, (a6) # Unknown-size Folded Reload .LBB28_162: # %vector.body626 # Parent Loop BB28_116 Depth=1 # => This Inner Loop Header: Depth=2 @@ -7162,8 +7161,8 @@ vluxei64.v v10, (s10), v12, v0.t vmfge.vf v12, v10, fa0 vmand.mm v0, v0, v12 - vmerge.vim v10, v20, 1, v0 - vadd.vv v8, v8, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, s3 add a1, a1, a2 bnez a5, .LBB28_162 @@ -7211,7 +7210,12 @@ li a2, 36 mul a2, s1, a2 mv a5, a4 - vmv2r.v v8, v20 + csrr a6, vlenb + slli a7, a6, 1 + add a6, a7, a6 + add a6, sp, a6 + addi a6, a6, 480 + vl2r.v v8, (a6) # Unknown-size Folded Reload .LBB28_169: # %vector.body604 # Parent Loop BB28_116 Depth=1 # => This Inner Loop Header: Depth=2 @@ -7262,7 +7266,12 @@ li a2, 36 mul a2, s1, a2 mv a5, a4 - vmv2r.v v8, v20 + csrr a6, vlenb + slli a7, a6, 1 + add a6, a7, a6 + add a6, sp, a6 + addi a6, a6, 480 + vl2r.v v8, (a6) # Unknown-size Folded Reload .LBB28_174: # %vector.body578 # Parent Loop BB28_116 Depth=1 # => This Inner Loop Header: Depth=2 @@ -7276,8 +7285,8 @@ vluxei64.v v10, (s10), v12, v0.t vmfle.vf v12, v10, fa0 vmand.mm v0, v0, v12 - vmerge.vim v10, v20, 1, v0 - vadd.vv v8, v8, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, s3 add a1, a1, a2 bnez a5, .LBB28_174 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/material.s 2024-04-01 12:40:58.826464987 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/material.s 2024-04-01 12:41:10.818130564 +0000 @@ -4556,37 +4556,35 @@ li a6, 352 mv a7, a1 mv t0, s2 - vmv1r.v v9, v8 .LBB39_29: # %vector.body # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (t0) vsetvli zero, zero, e64, m2, ta, ma vmsne.vi v0, v10, 0 vsetvli zero, zero, e16, mf2, ta, ma - vluxei64.v v12, (zero), v10, v0.t - vmseq.vx v12, v12, a4 - vmand.mm v0, v0, v12 + vluxei64.v v9, (zero), v10, v0.t + vmseq.vx v9, v9, a4 + vmand.mm v0, v0, v9 vsetvli zero, zero, e64, m2, ta, ma vluxei64.v v10, (a4), v10, v0.t - vmsne.vi v12, v10, 0 - vmand.mm v0, v0, v12 + vmsne.vi v9, v10, 0 + vmand.mm v0, v0, v9 vsetvli zero, zero, e16, mf2, ta, ma - vluxei64.v v12, (a5), v10, v0.t - vmseq.vi v12, v12, 8 - vmand.mm v0, v0, v12 + vluxei64.v v9, (a5), v10, v0.t + vmseq.vi v9, v9, 8 + vmand.mm v0, v0, v9 vsetvli zero, zero, e64, m2, ta, ma vluxei64.v v10, (a6), v10, v0.t - vmsne.vi v12, v10, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e16, mf2, ta, ma - vmerge.vim v10, v8, 1, v0 - vadd.vv v9, v9, v10 + vmsne.vi v9, v10, 0 + vmand.mm v0, v0, v9 + vsetvli zero, zero, e16, mf2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a7, a7, a2 add t0, t0, a3 bnez a7, .LBB39_29 # %bb.30: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v9, v8 + vmv.s.x v9, zero + vredsum.vs v8, v8, v9 li a2, 18 vmv.x.s s1, v8 bne a1, a2, .LBB39_46 --- build.a/MultiSource/Benchmarks/Prolangs-C/gnugo/CMakeFiles/gnugo.dir/endgame.s 2024-04-01 12:41:02.998348640 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/gnugo/CMakeFiles/gnugo.dir/endgame.s 2024-04-01 12:41:15.090011433 +0000 @@ -307,229 +307,210 @@ li a5, 76 mul a4, a4, a5 vsetvli a5, zero, e32, m2, ta, ma - vmv.v.i v8, 0 + vmv.v.i v10, 0 li a5, 19 mv a6, a2 vmv.v.i v12, 0 - vmv.v.i v10, 0 .LBB0_18: # %vector.body # =>This Inner Loop Header: Depth=1 - vlse8.v v14, (s0), a5 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vsetvli zero, zero, e32, m2, ta, mu + vlse8.v v8, (s0), a5 + vzext.vf4 v14, v8 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 addi a7, s0, 1 vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v8, v14 + vmsne.vx v14, v8, a0 + vmseq.vx v0, v8, a0 + vmseq.vx v15, v8, a1 addi a7, s0, 2 - vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vlse8.v v9, (a7), a5 + vmand.mm v8, v14, v15 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v14, v9 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 addi a7, s0, 3 vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v8, v14 + vmsne.vx v14, v8, a0 + vmseq.vx v0, v8, a0 + vmseq.vx v15, v8, a1 addi a7, s0, 4 - vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vlse8.v v9, (a7), a5 + vmand.mm v8, v14, v15 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v14, v9 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 addi a7, s0, 5 vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v8, v14 + vmsne.vx v14, v8, a0 + vmseq.vx v0, v8, a0 + vmseq.vx v15, v8, a1 addi a7, s0, 6 - vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vlse8.v v9, (a7), a5 + vmand.mm v8, v14, v15 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v14, v9 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 addi a7, s0, 7 vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v8, v14 + vmsne.vx v14, v8, a0 + vmseq.vx v0, v8, a0 + vmseq.vx v15, v8, a1 addi a7, s0, 8 - vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vlse8.v v9, (a7), a5 + vmand.mm v8, v14, v15 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v14, v9 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 addi a7, s0, 9 vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v8, v14 + vmsne.vx v14, v8, a0 + vmseq.vx v0, v8, a0 + vmseq.vx v15, v8, a1 addi a7, s0, 10 - vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vlse8.v v9, (a7), a5 + vmand.mm v8, v14, v15 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v14, v9 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 addi a7, s0, 11 vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v8, v14 + vmsne.vx v14, v8, a0 + vmseq.vx v0, v8, a0 + vmseq.vx v15, v8, a1 addi a7, s0, 12 - vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vlse8.v v9, (a7), a5 + vmand.mm v8, v14, v15 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v14, v9 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 addi a7, s0, 13 vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v8, v14 + vmsne.vx v14, v8, a0 + vmseq.vx v0, v8, a0 + vmseq.vx v15, v8, a1 addi a7, s0, 14 - vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vlse8.v v9, (a7), a5 + vmand.mm v8, v14, v15 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v14, v9 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 addi a7, s0, 15 vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v8, v14 + vmsne.vx v14, v8, a0 + vmseq.vx v0, v8, a0 + vmseq.vx v15, v8, a1 addi a7, s0, 16 - vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vlse8.v v9, (a7), a5 + vmand.mm v8, v14, v15 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v14, v9 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 addi a7, s0, 17 vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v8, v14 + vmsne.vx v14, v8, a0 + vmseq.vx v0, v8, a0 + vmseq.vx v15, v8, a1 addi a7, s0, 18 - vlse8.v v14, (a7), a5 - vadd.vv v10, v10, v16 - vmerge.vim v16, v8, 1, v0 - vadd.vv v12, v12, v16 - vzext.vf4 v16, v14 - vmsne.vx v14, v16, a0 - vmseq.vx v0, v16, a0 - vmseq.vx v15, v16, a1 - vmerge.vim v16, v8, 1, v0 - vmand.mm v0, v14, v15 - vadd.vv v10, v10, v16 - vmerge.vim v14, v8, 1, v0 - vadd.vv v12, v12, v14 + vlse8.v v9, (a7), a5 + vmand.mm v8, v14, v15 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t + vzext.vf4 v14, v9 + vmsne.vx v8, v14, a0 + vmseq.vx v0, v14, a0 + vmseq.vx v9, v14, a1 + vmand.mm v8, v8, v9 + vadd.vi v12, v12, 1, v0.t + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t sub a6, a6, a3 add s0, s0, a4 bnez a6, .LBB0_18 # %bb.19: # %middle.block vmv.s.x v8, zero - vredsum.vs v9, v12, v8 + vredsum.vs v9, v10, v8 vmv.x.s s1, v9 - vredsum.vs v8, v10, v8 + vredsum.vs v8, v12, v8 vmv.x.s s0, v8 .LBB0_20: # %for.cond101.preheader.preheader .Lpcrel_hi35: --- build.a/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s 2024-04-01 12:41:02.814353771 +0000 +++ build.b/MultiSource/Benchmarks/DOE-ProxyApps-C++/miniFE/CMakeFiles/miniFE.dir/main.s 2024-04-01 12:41:14.922016118 +0000 @@ -7941,94 +7941,92 @@ ld s8, 216(sp) add a2, s8, a2 sw s4, 0(a2) - slt a0, s7, s9 - xori a1, s2, 1 + slt a1, s7, s9 + addi a0, s3, 1 + xori a3, s2, 1 ld a2, 152(sp) # 8-byte Folded Reload addw a2, a2, s3 - slt a3, s9, a2 - xori a3, a3, 1 - vsetvli a4, zero, e32, m1, tu, ma - csrr a4, vlenb - slli a5, a4, 3 - add a4, a5, a4 - add a4, sp, a4 - addi a4, a4, 656 - vl1r.v v8, (a4) # Unknown-size Folded Reload + slt a4, s9, a2 + xori a4, a4, 1 + vsetvli a5, zero, e32, m1, tu, ma + csrr a5, vlenb + slli a6, a5, 3 + add a5, a6, a5 + add a5, sp, a5 + addi a5, a5, 656 + vl1r.v v8, (a5) # Unknown-size Folded Reload vmv.s.x v8, s4 - vsetvli a4, zero, e8, mf2, ta, ma - vmv.v.x v9, a3 + csrr a5, vlenb + slli a6, a5, 3 + sub a5, a6, a5 + add a5, sp, a5 + addi a5, a5, 656 + vl2r.v v10, (a5) # Unknown-size Folded Reload + vsetvli a5, zero, e8, mf2, ta, ma + vmv.v.x v9, a4 vmsne.vi v13, v9, 0 - vmv.v.x v9, a1 + vmv.v.x v9, a3 vmsne.vi v12, v9, 0 - vmv.v.x v9, a0 + vmv.v.x v9, a1 vmsne.vi v9, v9, 0 vsetvli zero, zero, e32, m2, ta, mu - csrr a0, vlenb - li a1, 20 - mul a0, a0, a1 - add a0, sp, a0 - addi a0, a0, 656 - vl2r.v v20, (a0) # Unknown-size Folded Reload - vor.vx v10, v20, a2 - vmsgt.vi v14, v10, -1 - vmand.mm v0, v14, v13 - addi a0, s3, 1 csrr a1, vlenb - slli a3, a1, 3 - sub a1, a3, a1 - add a1, sp, a1 - addi a1, a1, 656 - vl2r.v v24, (a1) # Unknown-size Folded Reload - vmv2r.v v10, v24 - csrr a1, vlenb - slli a3, a1, 2 - add a1, a3, a1 - add a1, sp, a1 - addi a1, a1, 656 - vl2r.v v26, (a1) # Unknown-size Folded Reload - vmv2r.v v14, v26 - csrr a1, vlenb - li a3, 18 + li a3, 20 mul a1, a1, a3 add a1, sp, a1 addi a1, a1, 656 - vl2r.v v22, (a1) # Unknown-size Folded Reload - vadd.vx v14, v22, a2, v0.t + vl2r.v v20, (a1) # Unknown-size Folded Reload + vor.vx v14, v20, a2 vmsgt.vi v10, v14, -1 - vmslt.vx v16, v14, s6 - vmand.mm v0, v10, v16 + vmand.mm v0, v10, v13 ld a3, 168(sp) # 8-byte Folded Reload add a1, a0, a3 vmv1r.v v10, v8 - vmerge.vim v14, v24, 1, v0 + csrr a4, vlenb + slli a5, a4, 2 + add a4, a5, a4 + add a4, sp, a4 + addi a4, a4, 656 + vl2r.v v22, (a4) # Unknown-size Folded Reload + vmv2r.v v14, v22 + csrr a4, vlenb + li a5, 18 + mul a4, a4, a5 + add a4, sp, a4 + addi a4, a4, 656 + vl2r.v v24, (a4) # Unknown-size Folded Reload + vadd.vx v14, v24, a2, v0.t + vmsgt.vi v8, v14, -1 + vmslt.vx v16, v14, s6 + vmand.mm v0, v8, v16 + vmv2r.v v14, v10 vor.vx v16, v20, s7 vmsgt.vi v8, v16, -1 - vmand.mm v0, v8, v12 - vadd.vv v14, v10, v14 - vmv2r.v v16, v26 - vadd.vx v16, v22, s7, v0.t + vmand.mm v8, v8, v12 + vadd.vi v14, v10, 1, v0.t + vmv2r.v v16, v22 + vmv1r.v v0, v8 + vadd.vx v16, v24, s7, v0.t vmsgt.vi v8, v16, -1 vmslt.vx v18, v16, s6 vmand.mm v0, v8, v18 vor.vx v16, v20, a1 vmsgt.vi v8, v16, -1 vmand.mm v8, v8, v9 - vmerge.vim v16, v24, 1, v0 - vmv2r.v v18, v26 + vadd.vi v14, v14, 1, v0.t + vmv2r.v v16, v22 vmv1r.v v0, v8 - vadd.vx v18, v22, a1, v0.t - vmsgt.vi v8, v18, -1 - vmslt.vx v20, v18, s6 - vmand.mm v0, v8, v20 - vadd.vv v14, v14, v16 - vmerge.vim v16, v24, 1, v0 - vadd.vv v14, v14, v16 + vadd.vx v16, v24, a1, v0.t + vmsgt.vi v8, v16, -1 + vmslt.vx v18, v16, s6 + vmand.mm v0, v8, v18 + vadd.vi v14, v14, 1, v0.t csrr a4, vlenb slli a4, a4, 4 add a4, sp, a4 addi a4, a4, 656 - vl2r.v v22, (a4) # Unknown-size Folded Reload - vor.vx v16, v22, a2 + vl2r.v v24, (a4) # Unknown-size Folded Reload + vor.vx v16, v24, a2 vmsgt.vi v8, v16, -1 vmand.mm v8, v8, v13 csrr a4, vlenb @@ -8038,42 +8036,40 @@ addi a4, a4, 656 vl1r.v v0, (a4) # Unknown-size Folded Reload vmerge.vvm v14, v14, v10, v0 - vmv2r.v v16, v26 + vmv2r.v v16, v22 vmv1r.v v0, v8 csrr a4, vlenb li a5, 14 mul a4, a4, a5 add a4, sp, a4 addi a4, a4, 656 - vl2r.v v28, (a4) # Unknown-size Folded Reload - vadd.vx v16, v28, a2, v0.t + vl2r.v v26, (a4) # Unknown-size Folded Reload + vadd.vx v16, v26, a2, v0.t vmsgt.vi v8, v16, -1 vmslt.vx v18, v16, s6 vmand.mm v0, v8, v18 - vor.vx v16, v22, s7 - vmsgt.vi v8, v16, -1 + vmv.v.v v16, v14 + vor.vx v18, v24, s7 + vmsgt.vi v8, v18, -1 vmand.mm v8, v8, v12 - vmerge.vim v16, v24, 1, v0 - vmv2r.v v18, v26 + vadd.vi v16, v14, 1, v0.t + vmv2r.v v18, v22 vmv1r.v v0, v8 - vadd.vx v18, v28, s7, v0.t + vadd.vx v18, v26, s7, v0.t vmsgt.vi v8, v18, -1 vmslt.vx v20, v18, s6 vmand.mm v0, v8, v20 - vor.vx v18, v22, a1 + vor.vx v18, v24, a1 vmsgt.vi v8, v18, -1 vmand.mm v8, v8, v9 - vmerge.vim v18, v24, 1, v0 - vmv2r.v v20, v26 + vadd.vi v16, v16, 1, v0.t + vmv2r.v v18, v22 vmv1r.v v0, v8 - vadd.vx v20, v28, a1, v0.t - vmsgt.vi v8, v20, -1 - vmslt.vx v22, v20, s6 - vadd.vv v16, v14, v16 - vmand.mm v0, v8, v22 - vadd.vv v16, v16, v18 - vmerge.vim v18, v24, 1, v0 - vadd.vv v16, v16, v18 + vadd.vx v18, v26, a1, v0.t + vmsgt.vi v8, v18, -1 + vmslt.vx v20, v18, s6 + vmand.mm v0, v8, v20 + vadd.vi v16, v16, 1, v0.t csrr a4, vlenb li a5, 23 mul a4, a4, a5 @@ -8086,8 +8082,8 @@ mul a4, a4, a5 add a4, sp, a4 addi a4, a4, 656 - vl2r.v v22, (a4) # Unknown-size Folded Reload - vor.vx v16, v22, a2 + vl2r.v v24, (a4) # Unknown-size Folded Reload + vor.vx v16, v24, a2 vmsgt.vi v8, v16, -1 vmand.mm v0, v8, v13 csrr a4, vlenb @@ -8099,49 +8095,47 @@ vadd.vx v16, v16, s10 mv a4, s1 vmul.vx v16, v16, s1 - vmv2r.v v18, v26 + vmv2r.v v18, v22 vadd.vx v18, v16, a2, v0.t vmsgt.vi v8, v18, -1 vmslt.vx v13, v18, s6 vmand.mm v0, v8, v13 - vor.vx v18, v22, s7 + vor.vx v18, v24, s7 vmsgt.vi v8, v18, -1 + vmv.v.v v18, v14 vmand.mm v8, v8, v12 - vmerge.vim v12, v24, 1, v0 - vmv2r.v v18, v26 + vadd.vi v18, v14, 1, v0.t + vmv2r.v v12, v22 vmv1r.v v0, v8 - vadd.vx v18, v16, s7, v0.t - vmsgt.vi v8, v18, -1 - vmslt.vx v20, v18, s6 + vadd.vx v12, v16, s7, v0.t + vmsgt.vi v8, v12, -1 + vmslt.vx v20, v12, s6 vmand.mm v0, v8, v20 - vor.vx v18, v22, a1 - vmsgt.vi v8, v18, -1 + vor.vx v12, v24, a1 + vmsgt.vi v8, v12, -1 vmand.mm v8, v8, v9 - vmerge.vim v18, v24, 1, v0 - vmv2r.v v20, v26 + vadd.vi v18, v18, 1, v0.t + vmv2r.v v12, v22 vmv1r.v v0, v8 - vadd.vx v20, v16, a1, v0.t - vadd.vv v8, v14, v12 - vmsgt.vi v12, v20, -1 - vmslt.vx v13, v20, s6 - vmand.mm v0, v12, v13 - vadd.vv v8, v8, v18 - vmerge.vim v12, v24, 1, v0 - vmv2r.v v16, v10 + vadd.vx v12, v16, a1, v0.t + vmsgt.vi v8, v12, -1 + vmslt.vx v9, v12, s6 + vmand.mm v0, v8, v9 + vadd.vi v18, v18, 1, v0.t csrr a1, vlenb li a2, 13 mul a1, a1, a2 add a1, sp, a1 addi a1, a1, 656 vl1r.v v0, (a1) # Unknown-size Folded Reload - vadd.vv v16, v8, v12, v0.t + vmerge.vvm v8, v10, v18, v0 csrr a1, vlenb li a2, 24 mul a1, a1, a2 add a1, sp, a1 addi a1, a1, 656 vl1r.v v0, (a1) # Unknown-size Folded Reload - vmerge.vvm v8, v16, v14, v0 + vmerge.vvm v8, v8, v14, v0 csrr a1, vlenb li a2, 10 mul a1, a1, a2 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/reg-stack.s 2024-04-01 12:41:00.474419028 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/reg-stack.s 2024-04-01 12:41:12.534082711 +0000 @@ -8195,10 +8195,10 @@ vle8.v v10, (a6) vzext.vf8 v16, v10 vsrl.vv v16, v12, v16 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v10, v16, 0 - vand.vi v10, v10, 1 - vadd.vv v8, v8, v10 + vand.vi v16, v16, 1 + vmsne.vi v0, v16, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, a4 add a6, a6, a4 bnez a5, .LBB14_70 --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/omega.s 2024-04-01 12:40:59.594443570 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/omega.s 2024-04-01 12:41:11.634107808 +0000 @@ -1208,14 +1208,13 @@ mv a6, a7 .LBB6_16: # %vector.ph vsetvli a7, zero, e32, m1, ta, ma - vmv.v.i v12, 0 + vmv.v.i v10, 0 vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v12, a0 + vmv.s.x v10, a0 vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v10, 0 vmv.v.i v8, 0 sub a6, a3, a6 - vmv1r.v v8, v12 + vmv1r.v v8, v10 addi a0, a4, 8 li a7, 12 mul a7, a2, a7 @@ -1223,10 +1222,10 @@ mv t1, a6 .LBB6_17: # %vector.body # =>This Inner Loop Header: Depth=1 - vlse32.v v12, (a0), t0 - vmseq.vi v0, v12, 1 - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vsetvli zero, zero, e32, m2, ta, mu + vlse32.v v10, (a0), t0 + vmseq.vi v0, v10, 1 + vadd.vi v8, v8, 1, v0.t sub t1, t1, a5 add a0, a0, a7 bnez t1, .LBB6_17 @@ -11887,19 +11886,18 @@ vmv.v.i v8, 0 li a6, 24 mv a7, a2 - vmv.v.i v10, 0 .LBB18_17: # %vector.body # =>This Inner Loop Header: Depth=1 - vlse32.v v12, (a4), a6 - vmseq.vi v0, v12, 1 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vsetvli zero, zero, e32, m2, ta, mu + vlse32.v v10, (a4), a6 + vmseq.vi v0, v10, 1 + vadd.vi v8, v8, 1, v0.t sub a7, a7, a3 add a4, a4, a5 bnez a7, .LBB18_17 # %bb.18: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a3, v8 j .LBB18_21 .LBB18_19: # %if.then42 --- build.a/External/SPEC/CINT2017rate/500.perlbench_r/CMakeFiles/500.perlbench_r.dir/root/cpu2017/benchspec/CPU/500.perlbench_r/src/doop.s 2024-04-01 12:40:59.306451601 +0000 +++ build.b/External/SPEC/CINT2017rate/500.perlbench_r/CMakeFiles/500.perlbench_r.dir/root/cpu2017/benchspec/CPU/500.perlbench_r/src/doop.s 2024-04-01 12:41:11.326116398 +0000 @@ -1360,28 +1360,26 @@ .LBB0_210: # %vector.ph neg a2, a3 and a2, a1, a2 - vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v8, 0 add a0, s2, a2 + vsetvli a4, zero, e32, m2, ta, ma + vmv.v.i v8, 0 mv a4, a2 - vmv.v.i v10, 0 .LBB0_211: # %vector.body # =>This Inner Loop Header: Depth=1 - vle8.v v12, (s2) vsetvli zero, zero, e8, mf2, ta, ma - vwaddu.vv v13, v12, v12 + vle8.v v10, (s2) + vwaddu.vv v11, v10, v10 vsetvli zero, zero, e16, m1, ta, ma - vluxei16.v v12, (s3), v13 - vmsgt.vi v0, v12, -1 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vluxei16.v v10, (s3), v11 + vmsgt.vi v0, v10, -1 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, a3 add s2, s2, a3 bnez a4, .LBB0_211 # %bb.212: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s1, v8 beq a1, a2, .LBB0_256 .LBB0_213: # %while.body.i34 --- build.a/MultiSource/Benchmarks/MiBench/office-ispell/CMakeFiles/office-ispell.dir/tree.s 2024-04-01 12:41:02.942350202 +0000 +++ build.b/MultiSource/Benchmarks/MiBench/office-ispell/CMakeFiles/office-ispell.dir/tree.s 2024-04-01 12:41:15.054012437 +0000 @@ -1025,9 +1025,8 @@ mul a2, a2, a7 add a2, s0, a2 vsetvli t0, zero, e32, m1, ta, ma - vmv.v.i v9, 0 - vsetvli zero, zero, e32, m1, tu, ma vmv.v.i v8, 0 + vsetvli zero, zero, e32, m1, tu, ma vmv.s.x v8, s6 li t0, 6 mul a4, a4, t0 @@ -1047,16 +1046,15 @@ vadd.vx v12, v10, t2 vluxei64.v v14, (a6), v12 vand.vx v16, v14, a7 - vmseq.vx v18, v16, a7 + vmseq.vx v9, v16, a7 vand.vx v14, v14, t0 vmsne.vx v16, v14, t0 - vmand.mm v0, v18, v16 + vmand.mm v0, v9, v16 vluxei64.v v12, (t1), v12, v0.t - vmsne.vi v14, v12, 0 - vmand.mm v0, v0, v14 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v12, v9, 1, v0 - vadd.vv v8, v8, v12 + vmsne.vi v9, v12, 0 + vmand.mm v0, v0, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t add a5, a5, a3 add t2, t2, a4 bnez a5, .LBB4_18 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/sculpt_paint/sculpt_uv.s 2024-04-01 12:40:58.982460637 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/sculpt_paint/sculpt_uv.s 2024-04-01 12:41:10.986125879 +0000 @@ -565,38 +565,36 @@ sub a2, a2, a4 and a2, a2, a1 vsetvli a4, zero, e64, m4, ta, ma - vid.v v8 + vid.v v12 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v12, 0 + vmv.v.i v8, 0 vsetvli zero, zero, e64, m4, ta, ma vmv.v.x v16, a0 li a4, 24 li a5, 18 li a6, 20 mv a7, a2 - vmv2r.v v14, v12 .LBB5_20: # %vector.body74 # =>This Inner Loop Header: Depth=1 - vmv4r.v v20, v8 + vmv4r.v v20, v12 vmadd.vx v20, a4, v16 vsetvli zero, zero, e8, mf2, ta, ma - vluxei64.v v24, (a5), v20 - vmsne.vi v0, v24, 0 + vluxei64.v v10, (a5), v20 + vmsne.vi v0, v10, 0 vsetvli zero, zero, e16, m1, ta, ma - vluxei64.v v24, (a6), v20, v0.t - vmseq.vx v20, v24, s5 - vmand.mm v0, v0, v20 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v20, v12, 1, v0 - vadd.vv v14, v14, v20 + vluxei64.v v10, (a6), v20, v0.t + vmseq.vx v10, v10, s5 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub a7, a7, a3 - vadd.vx v8, v8, a3 + vadd.vx v12, v12, a3 bnez a7, .LBB5_20 # %bb.21: # %middle.block66 - vmv.s.x v8, zero + vmv.s.x v10, zero vsetvli zero, zero, e32, m2, ta, ma - vredsum.vs v8, v14, v8 + vredsum.vs v8, v8, v10 vmv.x.s s0, v8 bne a2, a1, .LBB5_23 .LBB5_22: @@ -636,18 +634,16 @@ li a6, 12 mul a5, a5, a6 vsetvli a6, zero, e32, m2, ta, ma - vmv.v.i v10, 0 + vmv.v.i v8, 0 li a6, 24 mv a7, a2 - vmv.v.i v8, 0 .LBB5_28: # %vector.body # =>This Inner Loop Header: Depth=1 - vlse8.v v12, (a4), a6 vsetvli zero, zero, e8, mf2, ta, ma - vmsne.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vlse8.v v10, (a4), a6 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a7, a7, a3 add a4, a4, a5 bnez a7, .LBB5_28 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/uvedit/uvedit_smart_stitch.s 2024-04-01 12:40:59.046458851 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/uvedit/uvedit_smart_stitch.s 2024-04-01 12:41:11.054123983 +0000 @@ -849,21 +849,19 @@ vmv.v.i v8, 0 li a6, 24 mv a7, a2 - vmv.v.i v10, 0 .LBB5_14: # %vector.body # =>This Inner Loop Header: Depth=1 - vlse8.v v12, (a4), a6 vsetvli zero, zero, e8, mf2, ta, ma - vmsne.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vlse8.v v10, (a4), a6 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a7, a7, a3 add a4, a4, a5 bnez a7, .LBB5_14 # %bb.15: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s2, v8 beq a2, a0, .LBB5_18 .LBB5_16: # %for.body.preheader --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/softbody.s 2024-04-01 12:40:58.850464318 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/softbody.s 2024-04-01 12:41:10.846129783 +0000 @@ -1958,27 +1958,26 @@ add a2, s1, a2 subw a1, a1, a4 li a7, 10 - vsetvli t0, zero, e64, m4, ta, ma + mul a5, a5, a7 + vsetvli a7, zero, e64, m4, ta, ma vid.v v8 - vmul.vx v8, v8, a6 + vmul.vx v12, v8, a6 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v12, 0 - mul a5, a5, a7 + vmv.v.i v8, 0 mv a6, s1 - vmv.v.i v14, 0 .LBB7_151: # %vector.body586 # =>This Inner Loop Header: Depth=1 addi a7, a6, 12 - vluxei64.v v16, (a7), v8 - vmsne.vi v0, v16, 0 - vmerge.vim v16, v12, 1, v0 - vadd.vv v14, v14, v16 + vsetvli zero, zero, e32, m2, ta, mu + vluxei64.v v10, (a7), v12 + vmsne.vi v0, v10, 0 + vadd.vi v8, v8, 1, v0.t sub a4, a4, a3 add a6, a6, a5 bnez a4, .LBB7_151 # %bb.152: # %middle.block575 - vmv.s.x v8, zero - vredsum.vs v8, v14, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s3, v8 j .LBB7_201 .LBB7_153: --- build.a/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/decode_i386.s 2024-04-01 12:41:02.910351094 +0000 +++ build.b/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/decode_i386.s 2024-04-01 12:41:15.018013441 +0000 @@ -366,7 +366,6 @@ addi a7, a7, -1 vmv.v.x v17, a7 li t4, 120 - vmv1r.v v18, v11 li t1, 4 sd s5, 56(sp) # 8-byte Folded Spill mv a7, s5 @@ -378,73 +377,72 @@ # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e64, m2, ta, ma vadd.vx v8, v12, s1 - vadd.vx v20, v14, s3 - vlse64.v v22, (s1), s5 - vlse64.v v24, (s3), s6 - vluxei64.v v26, (s7), v8 - vluxei64.v v28, (s7), v20 - vluxei64.v v30, (s8), v8 - vluxei64.v v6, (s8), v20 - vluxei64.v v4, (s9), v8 - vluxei64.v v2, (s9), v20 - vfmul.vv v22, v22, v24 - vfnmsub.vv v28, v26, v22 - vfmadd.vv v6, v30, v28 - vfnmsub.vv v2, v4, v6 - vluxei64.v v22, (s10), v8 - vluxei64.v v24, (s10), v20 - vluxei64.v v26, (s11), v8 - vluxei64.v v28, (s11), v20 - vluxei64.v v30, (ra), v8 - vluxei64.v v6, (ra), v20 - vluxei64.v v4, (t3), v8 - vluxei64.v v0, (t3), v20 - vfmadd.vv v24, v22, v2 - vfnmsub.vv v28, v26, v24 - vfmadd.vv v6, v30, v28 - vfnmsub.vv v0, v4, v6 - vluxei64.v v22, (t5), v8 - vluxei64.v v24, (t5), v20 - vluxei64.v v26, (t6), v8 - vluxei64.v v28, (t6), v20 - vluxei64.v v30, (s0), v8 - vluxei64.v v6, (s0), v20 - vluxei64.v v4, (s2), v8 - vluxei64.v v2, (s2), v20 - vfmadd.vv v24, v22, v0 - vfnmsub.vv v28, v26, v24 - vfmadd.vv v6, v30, v28 - vfnmsub.vv v2, v4, v6 - vluxei64.v v22, (a3), v8 - vluxei64.v v24, (a3), v20 - vluxei64.v v26, (a2), v8 - vluxei64.v v28, (a2), v20 - vluxei64.v v30, (t0), v8 - vluxei64.v v6, (t0), v20 + vadd.vx v18, v14, s3 + vlse64.v v20, (s1), s5 + vlse64.v v22, (s3), s6 + vluxei64.v v24, (s7), v8 + vluxei64.v v26, (s7), v18 + vluxei64.v v28, (s8), v8 + vluxei64.v v30, (s8), v18 + vluxei64.v v6, (s9), v8 + vluxei64.v v4, (s9), v18 + vfmul.vv v20, v20, v22 + vfnmsub.vv v26, v24, v20 + vfmadd.vv v30, v28, v26 + vfnmsub.vv v4, v6, v30 + vluxei64.v v20, (s10), v8 + vluxei64.v v22, (s10), v18 + vluxei64.v v24, (s11), v8 + vluxei64.v v26, (s11), v18 + vluxei64.v v28, (ra), v8 + vluxei64.v v30, (ra), v18 + vluxei64.v v6, (t3), v8 + vluxei64.v v2, (t3), v18 + vfmadd.vv v22, v20, v4 + vfnmsub.vv v26, v24, v22 + vfmadd.vv v30, v28, v26 + vfnmsub.vv v2, v6, v30 + vluxei64.v v20, (t5), v8 + vluxei64.v v22, (t5), v18 + vluxei64.v v24, (t6), v8 + vluxei64.v v26, (t6), v18 + vluxei64.v v28, (s0), v8 + vluxei64.v v30, (s0), v18 + vluxei64.v v6, (s2), v8 + vluxei64.v v4, (s2), v18 + vfmadd.vv v22, v20, v2 + vfnmsub.vv v26, v24, v22 + vfmadd.vv v30, v28, v26 + vfnmsub.vv v4, v6, v30 + vluxei64.v v20, (a3), v8 + vluxei64.v v22, (a3), v18 + vluxei64.v v24, (a2), v8 + vluxei64.v v26, (a2), v18 + vluxei64.v v28, (t0), v8 + vluxei64.v v30, (t0), v18 vluxei64.v v8, (t4), v8 - vluxei64.v v20, (t4), v20 - vfmadd.vv v24, v22, v2 - vfnmsub.vv v28, v26, v24 - vfmadd.vv v6, v30, v28 - vfnmsub.vv v20, v8, v6 - vmfgt.vf v8, v20, fa5 - vmflt.vf v10, v20, fa4 + vluxei64.v v18, (t4), v18 + vfmadd.vv v22, v20, v4 + vfnmsub.vv v26, v24, v22 + vfmadd.vv v30, v28, v26 + vfnmsub.vv v18, v8, v30 + vmfgt.vf v8, v18, fa5 + vmflt.vf v10, v18, fa4 vmor.mm v9, v8, v10 vmnor.mm v0, v8, v10 vsetvli zero, zero, e32, m1, ta, ma - vfncvt.rtz.x.f.w v19, v20 + vfncvt.rtz.x.f.w v20, v18 vsetvli zero, zero, e16, mf2, ta, ma - vnsrl.wi v19, v19, 0 + vnsrl.wi v18, v20, 0 vmandn.mm v10, v10, v8 - vsse16.v v19, (t2), t1, v0.t + vsse16.v v18, (t2), t1, v0.t vmv1r.v v0, v10 vsse16.v v16, (t2), t1, v0.t vmv1r.v v0, v8 vsse16.v v17, (t2), t1, v0.t - vsetvli zero, zero, e32, m1, ta, ma + vsetvli zero, zero, e32, m1, ta, mu vmv1r.v v0, v9 - vmerge.vim v8, v11, 1, v0 - vadd.vv v18, v18, v8 + vadd.vi v11, v11, 1, v0.t add s1, s1, a4 add s3, s3, s4 sub a7, a7, a0 @@ -452,7 +450,7 @@ bnez a7, .LBB1_21 # %bb.22: # %middle.block vmv.s.x v8, zero - vredsum.vs v8, v18, v8 + vredsum.vs v8, v11, v8 vmv.x.s a0, v8 lui t0, 8 ld a2, 56(sp) # 8-byte Folded Reload --- build.a/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/procesnet.s 2024-04-01 12:41:03.018348082 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/procesnet.s 2024-04-01 12:41:15.110010875 +0000 @@ -87,9 +87,9 @@ vs1r.v v8, (a0) # Unknown-size Folded Spill vsetvli a0, zero, e32, m2, ta, ma vmv.v.i v8, 0 - # implicit-def: $x8 addi a0, sp, 128 vs2r.v v8, (a0) # Unknown-size Folded Spill + # implicit-def: $x8 sd s4, 32(sp) # 8-byte Folded Spill j .LBB0_3 .LBB0_2: # %for.end523 @@ -199,8 +199,6 @@ slli a2, a2, 48 srli a2, a2, 48 li a3, 1 - addi a4, sp, 128 - vl2r.v v14, (a4) # Unknown-size Folded Reload j .LBB0_12 .LBB0_11: # %for.inc66 # in Loop: Header=BB0_12 Depth=2 @@ -235,7 +233,8 @@ addi a7, a7, 128 vl1r.v v10, (a7) # Unknown-size Folded Reload vmv.s.x v10, s1 - vmv2r.v v8, v14 + addi a7, sp, 128 + vl2r.v v8, (a7) # Unknown-size Folded Reload addi a7, t0, 1 vmv1r.v v8, v10 addi t1, a4, 2 @@ -245,11 +244,10 @@ # Parent Loop BB0_12 Depth=2 # => This Inner Loop Header: Depth=3 vl1re16.v v10, (t1) - vsetvli t3, zero, e32, m2, ta, ma + vsetvli t3, zero, e32, m2, ta, mu vsext.vf2 v12, v10 vmsgt.vx v0, v12, a1 - vmerge.vim v10, v14, 1, v0 - vadd.vv v8, v8, v10 + vadd.vi v8, v8, 1, v0.t sub t2, t2, s6 add t1, t1, s5 bnez t2, .LBB0_15 --- build.a/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s 2024-04-01 12:41:03.022347971 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/uaspect.s 2024-04-01 12:41:15.110010875 +0000 @@ -91,14 +91,11 @@ fsqrt.d fs1, fa0 feq.d a0, fs1, fs1 csrr a1, vlenb - slli a2, a1, 1 - add a1, a2, a1 + slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 128 vs1r.v v8, (a1) # Unknown-size Folded Spill - csrr a1, vlenb - add a1, sp, a1 - addi a1, a1, 128 + addi a1, sp, 128 vs1r.v v11, (a1) # Unknown-size Folded Spill bnez a0, .LBB0_4 j .LBB0_159 @@ -118,23 +115,18 @@ vsetvli zero, zero, e64, m1, ta, ma vfsub.vv v8, v8, v10 vmfge.vf v0, v8, fs2 - vsetvli zero, zero, e32, mf2, ta, ma - vmv.v.i v8, 0 - addi a0, sp, 128 - vs1r.v v8, (a0) # Unknown-size Folded Spill - vmerge.vim v8, v8, 1, v0 - vadd.vv v8, v8, v9 - vsrl.vi v9, v8, 31 + vsetvli zero, zero, e32, mf2, ta, mu + vadd.vi v9, v9, 1, v0.t + vsrl.vi v8, v9, 31 .Lpcrel_hi2: auipc a0, %got_pcrel_hi(penalty) ld a1, %pcrel_lo(.Lpcrel_hi2)(a0) .Lpcrel_hi3: auipc a0, %got_pcrel_hi(overlap) ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) - vadd.vv v8, v8, v9 + vadd.vv v8, v9, v8 vsra.vi v8, v8, 1 csrr a2, vlenb - slli a2, a2, 1 add a2, sp, a2 addi a2, a2, 128 vs1r.v v8, (a2) # Unknown-size Folded Spill @@ -183,22 +175,19 @@ ld a5, 0(a2) vsetivli zero, 1, e32, mf2, ta, ma csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 + slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 128 vl1r.v v8, (a0) # Unknown-size Folded Reload vmv.x.s s3, v8 csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 + slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 128 vl1r.v v8, (a0) # Unknown-size Folded Reload vslidedown.vi v8, v8, 1 vmv.x.s s4, v8 csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 128 vl1r.v v8, (a0) # Unknown-size Folded Reload @@ -212,15 +201,10 @@ mv a4, s6 fmv.d fa0, fs1 jalr a5 - csrr a1, vlenb - add a1, sp, a1 - addi a1, a1, 128 - vl1r.v v12, (a1) # Unknown-size Folded Reload addi a1, sp, 128 - vl1r.v v21, (a1) # Unknown-size Folded Reload + vl1r.v v12, (a1) # Unknown-size Folded Reload csrr a1, vlenb - slli a2, a1, 1 - add a1, a2, a1 + slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 128 vl1r.v v20, (a1) # Unknown-size Folded Reload @@ -251,7 +235,6 @@ ld a1, 0(a1) ld a2, 0(a2) csrr a4, vlenb - slli a4, a4, 1 add a4, sp, a4 addi a4, a4, 128 vl1r.v v8, (a4) # Unknown-size Folded Reload @@ -270,8 +253,8 @@ addi a6, a3, 16 sw s9, 32(a5) sw s9, 28(a4) + vsetvli zero, zero, e32, mf2, ta, ma vle32.v v9, (a6) - addi a4, a4, 16 vsub.vv v9, v9, v20 vfwcvt.f.x.v v10, v9 vsetvli zero, zero, e64, m1, ta, ma @@ -284,11 +267,11 @@ vsetvli zero, zero, e64, m1, ta, ma vfsub.vv v9, v9, v11 vmfge.vf v0, v9, fs2 - vsetvli zero, zero, e32, mf2, ta, ma + vsetvli zero, zero, e32, mf2, ta, mu ld a3, 0(a3) - vmerge.vim v9, v21, 1, v0 - vadd.vv v10, v8, v10 - vadd.vv v9, v10, v9 + addi a4, a4, 16 + vadd.vv v9, v8, v10 + vadd.vi v9, v9, 1, v0.t vse32.v v9, (a4) bnez a3, .LBB0_6 .LBB0_7: # %for.cond92.preheader @@ -307,7 +290,6 @@ ld a1, %pcrel_lo(.Lpcrel_hi13)(a1) vsetivli zero, 2, e32, mf2, ta, ma csrr a3, vlenb - slli a3, a3, 1 add a3, sp, a3 addi a3, a3, 128 vl1r.v v8, (a3) # Unknown-size Folded Reload @@ -343,9 +325,8 @@ addi t5, t5, 4 li t6, 8 vsetvli a3, zero, e32, m1, ta, ma - vmv.v.i v9, 0 + vmv.v.i v9, 1 li s8, 12 - vmv.v.i v12, 1 li s9, 32 li ra, 28 li a7, 16 @@ -354,63 +335,61 @@ .LBB0_10: # %vector.body # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e64, m2, ta, ma - vmul.vx v14, v10, t4 - vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v13, (t5), v14 - vsetvli zero, zero, e64, m2, ta, ma - vsext.vf2 v14, v13 - vsll.vi v14, v14, 4 - vadd.vx v14, v14, s10 - vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v13, (t6), v14 - vsub.vx v13, v13, s3 - vfwcvt.f.x.v v16, v13 - vsetvli zero, zero, e64, m2, ta, ma - vfdiv.vf v16, v16, fs1 + vmul.vx v12, v10, t4 vsetvli zero, zero, e32, m1, ta, ma - vfncvt.rtz.x.f.w v13, v16 - vfwcvt.f.x.v v18, v13 + vluxei64.v v14, (t5), v12 vsetvli zero, zero, e64, m2, ta, ma - vfsub.vv v16, v16, v18 - vmfge.vf v0, v16, fs2 + vsext.vf2 v12, v14 + vsll.vi v12, v12, 4 + vadd.vx v12, v12, s10 vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v16, (s8), v14 - vmerge.vim v14, v9, 1, v0 - vadd.vx v13, v13, t0 - vadd.vv v13, v13, v14 - vsub.vx v14, v16, s4 + vluxei64.v v14, (t6), v12 + vsub.vx v14, v14, s3 vfwcvt.f.x.v v16, v14 vsetvli zero, zero, e64, m2, ta, ma - vfmul.vf v14, v16, fs1 + vfdiv.vf v14, v16, fs1 vsetvli zero, zero, e32, m1, ta, ma vfncvt.rtz.x.f.w v16, v14 vfwcvt.f.x.v v18, v16 vsetvli zero, zero, e64, m2, ta, ma vfsub.vv v14, v14, v18 vmfge.vf v0, v14, fs2 + vsetvli zero, zero, e32, m1, ta, mu + vluxei64.v v14, (s8), v12 + vadd.vx v12, v16, t0 + vadd.vi v12, v12, 1, v0.t + vsub.vx v13, v14, s4 + vfwcvt.f.x.v v14, v13 + vsetvli zero, zero, e64, m2, ta, ma + vfmul.vf v14, v14, fs1 vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v14, v9, 1, v0 - vlse32.v v15, (t2), t4 - vadd.vx v16, v16, t1 - vadd.vv v14, v16, v14 + vfncvt.rtz.x.f.w v13, v14 + vfwcvt.f.x.v v16, v13 + vsetvli zero, zero, e64, m2, ta, ma + vfsub.vv v14, v14, v16 + vmfge.vf v0, v14, fs2 + vsetvli zero, zero, e32, m1, ta, mu + vlse32.v v14, (t2), t4 + vadd.vx v13, v13, t1 + vadd.vi v13, v13, 1, v0.t vsetvli zero, zero, e64, m2, ta, ma - vsext.vf2 v16, v15 - vsll.vi v16, v16, 3 - vluxei64.v v16, (a0), v16 + vsext.vf2 v16, v14 + vsll.vi v14, v16, 3 + vluxei64.v v14, (a0), v14 vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v15, (zero), v16 + vluxei64.v v16, (zero), v14 vsetvli zero, zero, e64, m2, ta, ma - vsext.vf2 v18, v15 - vsll.vi v18, v18, 3 - vluxei64.v v18, (a1), v18 + vsext.vf2 v18, v16 + vsll.vi v16, v18, 3 + vluxei64.v v16, (a1), v16 vsetvli zero, zero, e32, m1, ta, ma - vsoxei64.v v12, (s9), v18 + vsoxei64.v v9, (s9), v16 vsetvli zero, zero, e64, m2, ta, ma - vluxei64.v v16, (t6), v16 + vluxei64.v v14, (t6), v14 vsetvli zero, zero, e32, m1, ta, ma - vsoxei64.v v12, (ra), v16 - vsoxei64.v v13, (a7), v16 - vsoxei64.v v14, (a5), v16 + vsoxei64.v v9, (ra), v14 + vsoxei64.v v12, (a7), v14 + vsoxei64.v v13, (a5), v14 vsetvli zero, zero, e64, m2, ta, ma vadd.vx v10, v10, a6 sub a4, a4, a6 @@ -430,7 +409,7 @@ mul a4, a7, a4 add a4, a4, a5 addi a4, a4, 4 - vsetivli zero, 2, e32, mf2, ta, ma + vsetivli zero, 2, e32, mf2, ta, mu .LBB0_13: # %for.body95 # =>This Inner Loop Header: Depth=1 lw a5, -4(a3) @@ -462,10 +441,9 @@ vsetvli zero, zero, e64, m1, ta, ma vfsub.vv v9, v9, v11 vmfge.vf v0, v9, fs2 - vsetvli zero, zero, e32, mf2, ta, ma - vmerge.vim v9, v21, 1, v0 - vadd.vv v10, v8, v10 - vadd.vv v9, v10, v9 + vsetvli zero, zero, e32, mf2, ta, mu + vadd.vv v9, v8, v10 + vadd.vi v9, v9, 1, v0.t addi a3, a3, 44 vse32.v v9, (a5) bne a3, a4, .LBB0_13 @@ -537,8 +515,7 @@ ld a0, 48(sp) # 8-byte Folded Reload ld a0, 0(a0) csrr a2, vlenb - slli a3, a2, 1 - add a2, a3, a2 + slli a2, a2, 1 add a2, sp, a2 addi a2, a2, 128 vl1r.v v10, (a2) # Unknown-size Folded Reload @@ -572,7 +549,6 @@ vwaddu.vv v8, v10, v10 li a0, -1 csrr a1, vlenb - slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 128 vl1r.v v11, (a1) # Unknown-size Folded Reload @@ -580,32 +556,30 @@ li a1, -1 vwmaccu.vx v8, a0, v10 vwmaccu.vx v9, a1, v11 - vsetivli zero, 4, e32, m1, ta, ma - vmv.v.i v10, 0 mv a0, s11 .LBB0_22: # %for.body205 # =>This Inner Loop Header: Depth=1 addi a1, a0, 72 - vle32.v v11, (a1) + vsetivli zero, 4, e32, m1, ta, ma + vle32.v v10, (a1) addi a1, a0, 56 - vsub.vv v11, v11, v8 - vfwcvt.f.x.v v12, v11 + vsub.vv v10, v10, v8 + vfwcvt.f.x.v v12, v10 vsetvli zero, zero, e64, m2, ta, ma - vfmul.vf v14, v12, fs1 + vfmul.vf v10, v12, fs1 vsetivli zero, 2, e64, m2, tu, ma - vfdiv.vf v14, v12, fs1 + vfdiv.vf v10, v12, fs1 vsetivli zero, 4, e32, m1, ta, ma - vfncvt.rtz.x.f.w v11, v14 - vfwcvt.f.x.v v12, v11 + vfncvt.rtz.x.f.w v12, v10 + vfwcvt.f.x.v v14, v12 vsetvli zero, zero, e64, m2, ta, ma - vfsub.vv v12, v14, v12 - vmfge.vf v0, v12, fs2 - vsetvli zero, zero, e32, m1, ta, ma + vfsub.vv v10, v10, v14 + vmfge.vf v0, v10, fs2 + vsetvli zero, zero, e32, m1, ta, mu ld a0, 0(a0) - vmerge.vim v12, v10, 1, v0 - vsub.vv v11, v11, v9 - vadd.vv v11, v11, v12 - vse32.v v11, (a1) + vsub.vv v10, v12, v9 + vadd.vi v10, v10, 1, v0.t + vse32.v v10, (a1) bnez a0, .LBB0_22 .LBB0_23: # %for.cond276.preheader beqz s1, .LBB0_25 @@ -663,49 +637,44 @@ addi a4, s10, 16 slli a5, a5, 3 li a6, 8 - vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v12, 0 li a7, 16 li t0, 12 li t1, 4 .LBB0_30: # %vector.body734 # =>This Inner Loop Header: Depth=1 - vsetvli zero, zero, e64, m4, ta, ma - vsll.vi v16, v8, 4 - vadd.vx v16, v16, s10 + vsll.vi v12, v8, 4 + vadd.vx v12, v12, s10 vsetvli zero, zero, e32, m2, ta, ma - vluxei64.v v14, (a6), v16 - vsub.vx v14, v14, s3 - vfwcvt.f.x.v v20, v14 + vluxei64.v v16, (a6), v12 + vsub.vx v16, v16, s3 + vfwcvt.f.x.v v20, v16 vsetvli zero, zero, e64, m4, ta, ma - vfdiv.vf v20, v20, fs1 + vfdiv.vf v16, v20, fs1 vsetvli zero, zero, e32, m2, ta, ma - vfncvt.rtz.x.f.w v14, v20 - vfwcvt.f.x.v v24, v14 + vfncvt.rtz.x.f.w v20, v16 + vfwcvt.f.x.v v24, v20 vsetvli zero, zero, e64, m4, ta, ma - vfsub.vv v20, v20, v24 - vmfge.vf v0, v20, fs2 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v20, v12, 1, v0 - vsub.vx v14, v14, s5 - vadd.vv v14, v14, v20 - vsse32.v v14, (a4), a7 - vluxei64.v v14, (t0), v16 - vsub.vx v14, v14, s4 - vfwcvt.f.x.v v20, v14 + vfsub.vv v16, v16, v24 + vmfge.vf v0, v16, fs2 + vsetvli zero, zero, e32, m2, ta, mu + vsub.vx v16, v20, s5 + vadd.vi v16, v16, 1, v0.t + vsse32.v v16, (a4), a7 + vluxei64.v v16, (t0), v12 + vsub.vx v16, v16, s4 + vfwcvt.f.x.v v20, v16 vsetvli zero, zero, e64, m4, ta, ma - vfmul.vf v20, v20, fs1 + vfmul.vf v16, v20, fs1 vsetvli zero, zero, e32, m2, ta, ma - vfncvt.rtz.x.f.w v14, v20 - vfwcvt.f.x.v v24, v14 + vfncvt.rtz.x.f.w v20, v16 + vfwcvt.f.x.v v24, v20 vsetvli zero, zero, e64, m4, ta, ma - vfsub.vv v20, v20, v24 - vmfge.vf v0, v20, fs2 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v20, v12, 1, v0 - vsub.vx v14, v14, s6 - vadd.vv v14, v14, v20 - vsoxei64.v v14, (t1), v16 + vfsub.vv v16, v16, v24 + vmfge.vf v0, v16, fs2 + vsetvli zero, zero, e32, m2, ta, mu + vsub.vx v16, v20, s6 + vadd.vi v16, v16, 1, v0.t + vsoxei64.v v16, (t1), v12 vsetvli zero, zero, e64, m4, ta, ma vadd.vx v8, v8, a1 sub a3, a3, a1 @@ -1684,23 +1653,18 @@ j .LBB0_154 .LBB0_159: # %call.sqrt csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 128 vs1r.v v9, (a0) # Unknown-size Folded Spill call sqrt csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 128 vl1r.v v9, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 128 + addi a0, sp, 128 vl1r.v v11, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 + slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 128 vl1r.v v8, (a0) # Unknown-size Folded Reload --- build.a/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/lac/constraint_matrix.s 2024-04-01 12:40:58.230481609 +0000 +++ build.b/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/lac/constraint_matrix.s 2024-04-01 12:41:10.266145957 +0000 @@ -74184,10 +74184,9 @@ .cfi_offset fs0, -112 .cfi_offset fs1, -120 csrr a1, vlenb - li a2, 10 - mul a1, a1, a2 + slli a1, a1, 3 sub sp, sp, a1 - .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x80, 0x02, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 256 + 10 * vlenb + .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x80, 0x02, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 256 + 8 * vlenb mv s0, a0 lbu a0, 136(a0) bnez a0, .LBB92_172 @@ -74757,7 +74756,6 @@ vsetvli zero, zero, e32, m1, tu, ma vmv.s.x v8, a0 vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v20, 0 vmv.v.i v10, 0 vmv1r.v v10, v8 addi a0, sp, 128 @@ -74791,12 +74789,6 @@ lui a0, 4112 addiw a0, a0, 257 sd a0, 56(sp) # 8-byte Folded Spill - csrr a0, vlenb - slli a1, a0, 3 - sub a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 128 - vs2r.v v20, (a0) # Unknown-size Folded Spill sd t3, 88(sp) # 8-byte Folded Spill sd s5, 32(sp) # 8-byte Folded Spill sd s6, 24(sp) # 8-byte Folded Spill @@ -74875,12 +74867,6 @@ mv a1, s2 call _ZSt22__final_insertion_sortIN9__gnu_cxx17__normal_iteratorIPSt4pairIjdESt6vectorIS3_SaIS3_EEEENS0_5__ops15_Iter_less_iterEEvT_SB_T0_ ld t3, 88(sp) # 8-byte Folded Reload - csrr a0, vlenb - slli a1, a0, 3 - sub a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 128 - vl2r.v v20, (a0) # Unknown-size Folded Reload ld a1, 16(s4) ld s1, 8(s4) sub a0, a1, s1 @@ -75185,12 +75171,6 @@ ld s1, 8(s4) sub a0, a1, s1 srai a0, a0, 4 - csrr a2, vlenb - slli a3, a2, 3 - sub a2, a3, a2 - add a2, sp, a2 - addi a2, a2, 128 - vl2r.v v20, (a2) # Unknown-size Folded Reload mv t2, s8 ld t3, 88(sp) # 8-byte Folded Reload beq a1, s1, .LBB92_89 @@ -75366,15 +75346,14 @@ .LBB92_163: # %vector.body512 # Parent Loop BB92_90 Depth=1 # => This Inner Loop Header: Depth=2 - vlse32.v v12, (a5), s7 vsetvli zero, zero, e64, m4, ta, ma + vlse32.v v12, (a5), s7 vzext.vf2 v16, v8 vsll.vi v16, v16, 4 - vsetvli zero, zero, e32, m2, ta, ma + vsetvli zero, zero, e32, m2, ta, mu vluxei64.v v14, (s1), v16 vmseq.vv v0, v12, v14 - vmerge.vim v12, v20, 1, v0 - vadd.vv v10, v10, v12 + vadd.vi v10, v10, 1, v0.t vadd.vx v8, v8, a1 sub t0, t0, t2 add a5, a5, t3 @@ -75432,8 +75411,7 @@ sb a0, 136(s0) .LBB92_172: # %return csrr a0, vlenb - li a1, 10 - mul a0, a0, a1 + slli a0, a0, 3 add sp, sp, a0 ld ra, 248(sp) # 8-byte Folded Reload ld s0, 240(sp) # 8-byte Folded Reload --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/modulo-sched.s 2024-04-01 12:40:59.590443681 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/modulo-sched.s 2024-04-01 12:41:11.630107920 +0000 @@ -907,13 +907,7 @@ lui a1, 524288 addiw a1, a1, -1 sd a1, 88(sp) # 8-byte Folded Spill - vsetivli zero, 2, e32, mf2, ta, ma - vmv.v.i v8, 0 - csrr a1, vlenb - add a1, sp, a1 - addi a1, a1, 432 - vs1r.v v8, (a1) # Unknown-size Folded Spill - vsetvli zero, zero, e64, m1, ta, ma + vsetivli zero, 2, e64, m1, ta, ma vmv.v.i v8, 0 addi a1, sp, 432 vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -3350,13 +3344,8 @@ vmv.v.x v8, s0 vmsle.vv v0, v8, v10 ld a0, 8(s8) - csrr a1, vlenb - add a1, sp, a1 - addi a1, a1, 432 - vl1r.v v8, (a1) # Unknown-size Folded Reload - vmerge.vim v8, v8, 1, v0 - vadd.vv v8, v11, v8 - vse32.v v8, (s10) + vadd.vi v11, v11, 1, v0.t + vse32.v v11, (s10) call free lw a0, 16(s8) sd s4, 8(s8) --- build.a/MultiSource/Benchmarks/Prolangs-C/gnugo/CMakeFiles/gnugo.dir/suicide.s 2024-04-01 12:41:03.002348529 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/gnugo/CMakeFiles/gnugo.dir/suicide.s 2024-04-01 12:41:15.090011433 +0000 @@ -70,247 +70,228 @@ .Lpcrel_hi4: auipc a4, %got_pcrel_hi(l) ld a4, %pcrel_lo(.Lpcrel_hi4)(a4) - vsetvli a7, zero, e32, m2, ta, ma - vmv.v.i v8, 0 mul a5, a5, a6 + vsetvli a6, zero, e32, m2, ta, ma + vmv.v.i v8, 0 mv a6, a1 - vmv.v.i v10, 0 .LBB0_4: # %vector.body # =>This Inner Loop Header: Depth=1 add t0, s3, a2 - vlse8.v v12, (t0), s4 - add a7, a4, a2 - vlse8.v v13, (a7), s4 - vzext.vf4 v14, v12 - vmseq.vx v12, v14, a0 - vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v13, v13, 0 - vmand.mm v0, v12, v13 vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t0), s4 + add a7, a4, a2 + vlse8.v v11, (a7), s4 + vzext.vf4 v12, v10 + vmseq.vx v10, v12, a0 + vsetvli zero, zero, e8, mf2, ta, ma + vmseq.vi v11, v11, 0 + vmand.mm v0, v10, v11 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 1 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 1 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 2 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 2 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 3 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 3 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 4 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 4 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 5 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 5 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 6 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 6 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 7 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 7 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 8 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 8 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 9 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 9 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 10 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 10 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 11 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 11 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 12 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 12 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 13 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 13 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 14 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 14 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 15 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 15 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 16 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 16 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t1, t0, 17 - vlse8.v v12, (t1), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t1), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi t1, a7, 17 - vlse8.v v12, (t1), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma + vlse8.v v10, (t1), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu addi t0, t0, 18 - vlse8.v v12, (t0), s4 - vmerge.vim v14, v8, 1, v0 - vzext.vf4 v16, v12 - vmseq.vx v0, v16, a0 + vlse8.v v10, (t0), s4 + vadd.vi v8, v8, 1, v0.t + vzext.vf4 v12, v10 + vmseq.vx v0, v12, a0 addi a7, a7, 18 - vlse8.v v12, (a7), s4, v0.t - vadd.vv v10, v10, v14 vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v12, v12, 0 - vmand.mm v0, v0, v12 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vlse8.v v10, (a7), s4, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a6, a6, a3 add a2, a2, a5 bnez a6, .LBB0_4 # %bb.5: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a2, v8 .LBB0_6: # %for.cond5.preheader.preheader .Lpcrel_hi5: --- build.a/MultiSource/Benchmarks/Prolangs-C/unix-tbl/CMakeFiles/unix-tbl.dir/t7.s 2024-04-01 12:41:03.026347860 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/unix-tbl/CMakeFiles/unix-tbl.dir/t7.s 2024-04-01 12:41:15.118010652 +0000 @@ -70,30 +70,29 @@ vmv.v.i v9, 0 slli a6, a2, 1 mv a7, a1 - vmv.v.i v11, 0 vmv.v.i v10, 0 .LBB0_8: # %vector.body # =>This Inner Loop Header: Depth=1 vl1re32.v v8, (a4) - vmseq.vi v0, v8, 0 + vsetvli zero, zero, e32, m1, ta, ma + vmseq.vi v8, v8, 0 + vmv.v.v v0, v8 vle64.v v12, (a5), v0.t - vmsne.vi v8, v8, 0 vsetvli zero, zero, e64, m2, ta, ma - vmseq.vi v14, v12, 0 - vmand.mm v0, v0, v14 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v12, v9, 1, v0 - vadd.vv v10, v10, v12 - vmv1r.v v0, v8 - vmerge.vim v8, v9, 1, v0 - vadd.vv v11, v11, v8 + vmseq.vi v11, v12, 0 + vmand.mm v0, v8, v11 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v10, v10, 1, v0.t + vadd.vi v11, v9, 1 + vmv.v.v v0, v8 + vmerge.vvm v9, v11, v9, v0 add a4, a4, a2 sub a7, a7, a3 add a5, a5, a6 bnez a7, .LBB0_8 # %bb.9: # %middle.block vmv.s.x v8, zero - vredsum.vs v9, v11, v8 + vredsum.vs v9, v9, v8 vmv.x.s a3, v9 vredsum.vs v8, v10, v8 vmv.x.s a2, v8 @@ -303,30 +302,29 @@ vmv.v.i v9, 0 slli a6, a2, 1 mv a7, a1 - vmv.v.i v11, 0 vmv.v.i v10, 0 .LBB1_5: # %vector.body # =>This Inner Loop Header: Depth=1 vl1re32.v v8, (a4) - vmseq.vi v0, v8, 0 + vsetvli zero, zero, e32, m1, ta, ma + vmseq.vi v8, v8, 0 + vmv.v.v v0, v8 vle64.v v12, (a5), v0.t - vmsne.vi v8, v8, 0 vsetvli zero, zero, e64, m2, ta, ma - vmseq.vi v14, v12, 0 - vmand.mm v0, v0, v14 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v12, v9, 1, v0 - vadd.vv v10, v10, v12 - vmv1r.v v0, v8 - vmerge.vim v8, v9, 1, v0 - vadd.vv v11, v11, v8 + vmseq.vi v11, v12, 0 + vmand.mm v0, v8, v11 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v10, v10, 1, v0.t + vadd.vi v11, v9, 1 + vmv.v.v v0, v8 + vmerge.vvm v9, v11, v9, v0 add a4, a4, a2 sub a7, a7, a3 add a5, a5, a6 bnez a7, .LBB1_5 # %bb.6: # %middle.block vmv.s.x v8, zero - vredsum.vs v9, v11, v8 + vredsum.vs v9, v9, v8 vmv.x.s a3, v9 vredsum.vs v8, v10, v8 vmv.x.s a2, v8 --- build.a/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/csr_matrix.s 2024-04-01 12:41:02.698357006 +0000 +++ build.b/MultiSource/Benchmarks/ASC_Sequoia/AMGmk/CMakeFiles/AMGmk.dir/csr_matrix.s 2024-04-01 12:41:14.802019464 +0000 @@ -215,24 +215,22 @@ vmv.v.i v10, 0 addi a6, a2, -1 mv a7, a1 - vmv.v.i v12, 0 .LBB4_5: # %vector.body # =>This Inner Loop Header: Depth=1 - vmv2r.v v14, v8 + vmv2r.v v12, v8 vl2re32.v v8, (a4) vsetivli zero, 1, e32, m2, ta, ma - vslidedown.vx v14, v14, a6 - vsetvli t0, zero, e32, m2, ta, ma - vslideup.vi v14, v8, 1 - vmslt.vv v0, v14, v8 - vmerge.vim v14, v10, 1, v0 - vadd.vv v12, v12, v14 + vslidedown.vx v12, v12, a6 + vsetvli t0, zero, e32, m2, ta, mu + vslideup.vi v12, v8, 1 + vmslt.vv v0, v12, v8 + vadd.vi v10, v10, 1, v0.t sub a7, a7, a2 add a4, a4, a5 bnez a7, .LBB4_5 # %bb.6: # %middle.block - vmv.s.x v10, zero - vredsum.vs v10, v12, v10 + vmv.s.x v12, zero + vredsum.vs v10, v10, v12 vmv.x.s a2, v10 beq a1, s0, .LBB4_10 # %bb.7: @@ -940,24 +938,22 @@ vmv.v.i v10, 0 addi a6, a0, -1 mv a7, a3 - vmv.v.i v12, 0 .LBB8_29: # %vector.body61 # =>This Inner Loop Header: Depth=1 - vmv2r.v v14, v8 + vmv2r.v v12, v8 vl2re32.v v8, (a5) vsetivli zero, 1, e32, m2, ta, ma - vslidedown.vx v14, v14, a6 - vsetvli t0, zero, e32, m2, ta, ma - vslideup.vi v14, v8, 1 - vmslt.vv v0, v14, v8 - vmerge.vim v14, v10, 1, v0 - vadd.vv v12, v12, v14 + vslidedown.vx v12, v12, a6 + vsetvli t0, zero, e32, m2, ta, mu + vslideup.vi v12, v8, 1 + vmslt.vv v0, v12, v8 + vadd.vi v10, v10, 1, v0.t sub a7, a7, a0 add a5, a5, a1 bnez a7, .LBB8_29 # %bb.30: # %middle.block53 - vmv.s.x v10, zero - vredsum.vs v10, v12, v10 + vmv.s.x v12, zero + vredsum.vs v10, v10, v12 vmv.x.s a0, v10 beq a3, a2, .LBB8_34 # %bb.31: --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/reg-stack.s 2024-04-01 12:40:59.614443012 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/reg-stack.s 2024-04-01 12:41:11.654107251 +0000 @@ -8195,10 +8195,10 @@ vle8.v v10, (a6) vzext.vf8 v16, v10 vsrl.vv v16, v12, v16 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v10, v16, 0 - vand.vi v10, v10, 1 - vadd.vv v8, v8, v10 + vand.vi v16, v16, 1 + vmsne.vi v0, v16, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, a4 add a6, a6, a4 bnez a5, .LBB14_70 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/particle_system.s 2024-04-01 12:40:58.838464652 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/particle_system.s 2024-04-01 12:41:10.834130118 +0000 @@ -14931,30 +14931,28 @@ mul a1, a2, a6 add a1, a3, a1 li a7, 100 - vsetvli t0, zero, e64, m4, ta, ma + mul a5, a5, a7 + vsetvli a7, zero, e64, m4, ta, ma vid.v v8 - vmul.vx v8, v8, a6 + vmul.vx v12, v8, a6 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v14, 0 - mul a5, a5, a7 + vmv.v.i v8, 0 mv a6, a2 - vmv.v.i v12, 0 .LBB37_10: # %vector.body # =>This Inner Loop Header: Depth=1 addi a7, a3, 196 vsetvli zero, zero, e16, m1, ta, ma - vluxei64.v v16, (a7), v8 - vand.vi v16, v16, 3 - vmseq.vi v0, v16, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v16, v14, 1, v0 - vadd.vv v12, v12, v16 + vluxei64.v v10, (a7), v12 + vand.vi v10, v10, 3 + vmseq.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a6, a6, a4 add a3, a3, a5 bnez a6, .LBB37_10 # %bb.11: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v12, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s1, v8 beq a2, a0, .LBB37_14 .LBB37_12: # %for.body.preheader37 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/haifa-sched.s 2024-04-01 12:41:00.342422710 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/haifa-sched.s 2024-04-01 12:41:12.394086614 +0000 @@ -2994,18 +2994,16 @@ sub a2, a2, a4 and a2, a2, s8 vsetvli a4, zero, e32, m2, ta, ma - vmv.v.i v10, 0 + vmv.v.i v8, 0 mv a4, a2 mv a5, a1 - vmv.v.i v8, 0 .LBB23_26: # %vector.body108 # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a5) vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vle8.v v10, (a5) + vmseq.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, a3 add a5, a5, a3 bnez a4, .LBB23_26 --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-loop-distribution.s 2024-04-01 12:40:59.658441785 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-loop-distribution.s 2024-04-01 12:41:11.702105912 +0000 @@ -220,6 +220,8 @@ sd a1, 24(sp) # 8-byte Folded Spill vsetvli a1, zero, e32, m1, ta, ma vmv.v.i v8, 0 + addi a1, sp, 336 + vs1r.v v8, (a1) # Unknown-size Folded Spill lui a1, 16 addiw a1, a1, -1 sd a1, 120(sp) # 8-byte Folded Spill @@ -227,12 +229,10 @@ addiw a1, a1, -993 sd a1, 152(sp) # 8-byte Folded Spill vsetivli zero, 2, e64, m1, ta, ma - vmv.v.i v9, 0 + vmv.v.i v8, 0 csrr a1, vlenb add a1, sp, a1 addi a1, a1, 336 - vs1r.v v9, (a1) # Unknown-size Folded Spill - addi a1, sp, 336 vs1r.v v8, (a1) # Unknown-size Folded Spill sd a0, 96(sp) # 8-byte Folded Spill .LBB2_17: # %for.body @@ -708,8 +708,6 @@ # in Loop: Header=BB2_17 Depth=1 ld a2, 8(s0) addi a2, a2, 24 - addi a0, sp, 336 - vl1r.v v12, (a0) # Unknown-size Folded Reload li a6, 32 li a7, 9 bgeu s11, a1, .LBB2_86 @@ -726,10 +724,11 @@ .LBB2_79: # %vector.ph # in Loop: Header=BB2_17 Depth=1 sub a3, a1, a0 - vsetvli a0, zero, e32, m1, ta, ma + vsetvli a0, zero, e32, m1, ta, mu mv a0, a2 mv a4, a3 - vmv1r.v v8, v12 + addi t0, sp, 336 + vl1r.v v8, (t0) # Unknown-size Folded Reload .LBB2_80: # %vector.body # Parent Loop BB2_17 Depth=1 # => This Inner Loop Header: Depth=2 @@ -737,15 +736,13 @@ vsetvli zero, zero, e8, mf4, ta, ma vluxei64.v v9, (a5), v10 vmsne.vi v0, v9, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v12, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e8, mf4, ta, ma vluxei64.v v9, (a7), v10 vmsne.vi v0, v9, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v12, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, s11 add a0, a0, s9 bnez a4, .LBB2_80 --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ira-emit.s 2024-04-01 12:40:59.574444128 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ira-emit.s 2024-04-01 12:41:11.614108366 +0000 @@ -2734,7 +2734,7 @@ auipc a1, %pcrel_hi(hard_regno_last_set_check) addi a1, a1, %pcrel_lo(.Lpcrel_hi80) sd a1, 64(sp) # 8-byte Folded Spill - beqz s0, .LBB5_34 + beqz s0, .LBB5_33 # %bb.1: # %for.body.preheader vsetvli a1, zero, e32, m1, ta, ma vmv.v.x v8, a0 @@ -2822,54 +2822,51 @@ j .LBB5_2 .LBB5_12: # %for.body24.preheader vsetvli zero, zero, e32, m1, ta, ma - vmv.v.i v12, 0 + vmv.v.i v8, 0 + addi a0, sp, 128 + vs1r.v v8, (a0) # Unknown-size Folded Spill li s9, 8 li s10, 4 mv s11, s0 - addi a0, sp, 128 - vs1r.v v12, (a0) # Unknown-size Folded Spill - j .LBB5_16 -.LBB5_13: # in Loop: Header=BB5_16 Depth=1 - addi a1, sp, 128 - vl1r.v v12, (a1) # Unknown-size Folded Reload -.LBB5_14: # %for.end89 - # in Loop: Header=BB5_16 Depth=1 + j .LBB5_15 +.LBB5_13: # %for.end89 + # in Loop: Header=BB5_15 Depth=1 sw a0, 28(s11) -.LBB5_15: # %for.inc91 - # in Loop: Header=BB5_16 Depth=1 +.LBB5_14: # %for.inc91 + # in Loop: Header=BB5_15 Depth=1 ld s11, 16(s11) - beqz s11, .LBB5_34 -.LBB5_16: # %for.body24 + beqz s11, .LBB5_33 +.LBB5_15: # %for.body24 # =>This Loop Header: Depth=1 - # Child Loop BB5_22 Depth 2 - # Child Loop BB5_32 Depth 2 - # Child Loop BB5_27 Depth 2 + # Child Loop BB5_21 Depth 2 + # Child Loop BB5_31 Depth 2 + # Child Loop BB5_26 Depth 2 ld s8, 0(s11) lw s6, 12(s8) - bltz s6, .LBB5_15 -# %bb.17: # %if.then30 - # in Loop: Header=BB5_16 Depth=1 + bltz s6, .LBB5_14 +# %bb.16: # %if.then30 + # in Loop: Header=BB5_15 Depth=1 lwu a0, 8(s8) li a1, 87 mul a1, s6, a1 add a0, s3, a0 add a0, a0, a1 lbu s7, 0(a0) - beqz s7, .LBB5_20 -# %bb.18: # %for.body40.lr.ph - # in Loop: Header=BB5_16 Depth=1 + beqz s7, .LBB5_19 +# %bb.17: # %for.body40.lr.ph + # in Loop: Header=BB5_15 Depth=1 lw a0, %pcrel_lo(.Lpcrel_hi78)(s4) addi a1, s8, 4 - bgeu s7, s2, .LBB5_21 -# %bb.19: # in Loop: Header=BB5_16 Depth=1 + bgeu s7, s2, .LBB5_20 +# %bb.18: # in Loop: Header=BB5_15 Depth=1 li a2, 0 li a3, 0 - j .LBB5_30 -.LBB5_20: # in Loop: Header=BB5_16 Depth=1 + j .LBB5_29 +.LBB5_19: # in Loop: Header=BB5_15 Depth=1 li a3, 0 - j .LBB5_24 -.LBB5_21: # %vector.ph190 - # in Loop: Header=BB5_16 Depth=1 + j .LBB5_23 +.LBB5_20: # %vector.ph190 + # in Loop: Header=BB5_15 Depth=1 ld a2, 112(sp) # 8-byte Folded Reload li a3, 254 mul a2, a2, a3 @@ -2880,43 +2877,44 @@ slli a4, s6, 2 ld a5, 64(sp) # 8-byte Folded Reload add a4, a5, a4 - vsetvli a5, zero, e32, m1, ta, ma + vsetvli a5, zero, e32, m1, ta, mu mv a5, a2 - vmv1r.v v8, v12 -.LBB5_22: # %vector.body195 - # Parent Loop BB5_16 Depth=1 + addi a6, sp, 128 + vl1r.v v8, (a6) # Unknown-size Folded Reload +.LBB5_21: # %vector.body195 + # Parent Loop BB5_15 Depth=1 # => This Inner Loop Header: Depth=2 vl1re32.v v9, (a4) vmseq.vx v0, v9, a0 - vle64.v v10, (a3), v0.t vsetvli zero, zero, e64, m2, ta, ma + vle64.v v10, (a3), v0.t vluxei64.v v10, (s9), v10, v0.t vsetvli zero, zero, e32, m1, ta, ma vluxei64.v v9, (s10), v10, v0.t vlse32.v v10, (a1), zero, v0.t vmsne.vv v9, v9, v10 vmand.mm v0, v0, v9 - vmerge.vim v9, v12, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t add a3, a3, s1 sub a5, a5, s2 add a4, a4, s5 - bnez a5, .LBB5_22 -# %bb.23: # %middle.block187 - # in Loop: Header=BB5_16 Depth=1 + bnez a5, .LBB5_21 +# %bb.22: # %middle.block187 + # in Loop: Header=BB5_15 Depth=1 vmv.s.x v9, zero vredsum.vs v8, v8, v9 vmv.x.s a3, v8 - bne a2, s7, .LBB5_30 -.LBB5_24: # %for.end58 - # in Loop: Header=BB5_16 Depth=1 + bne a2, s7, .LBB5_29 +.LBB5_23: # %for.end58 + # in Loop: Header=BB5_15 Depth=1 slli a0, a3, 3 call ira_allocate sd a0, 32(s11) li a0, 0 beqz s7, .LBB5_13 -# %bb.25: # %for.body63.lr.ph - # in Loop: Header=BB5_16 Depth=1 +# %bb.24: # %for.body63.lr.ph + # in Loop: Header=BB5_15 Depth=1 lw a1, %pcrel_lo(.Lpcrel_hi78)(s4) slli a2, s6, 3 ld a3, 72(sp) # 8-byte Folded Reload @@ -2927,36 +2925,34 @@ ld a5, 64(sp) # 8-byte Folded Reload add a3, a5, a3 add a4, a5, a4 - addi a5, sp, 128 - vl1r.v v12, (a5) # Unknown-size Folded Reload - j .LBB5_27 -.LBB5_26: # %for.inc87 - # in Loop: Header=BB5_27 Depth=2 + j .LBB5_26 +.LBB5_25: # %for.inc87 + # in Loop: Header=BB5_26 Depth=2 addi a3, a3, 4 addi a2, a2, 8 - beq a3, a4, .LBB5_14 -.LBB5_27: # %for.body63 - # Parent Loop BB5_16 Depth=1 + beq a3, a4, .LBB5_13 +.LBB5_26: # %for.body63 + # Parent Loop BB5_15 Depth=1 # => This Inner Loop Header: Depth=2 lw a5, 0(a3) - bne a5, a1, .LBB5_26 -# %bb.28: # %land.lhs.true69 - # in Loop: Header=BB5_27 Depth=2 + bne a5, a1, .LBB5_25 +# %bb.27: # %land.lhs.true69 + # in Loop: Header=BB5_26 Depth=2 ld a5, 0(a2) ld a6, 8(a5) lw a6, 4(a6) lw a7, 4(s8) - beq a6, a7, .LBB5_26 -# %bb.29: # %if.then78 - # in Loop: Header=BB5_27 Depth=2 + beq a6, a7, .LBB5_25 +# %bb.28: # %if.then78 + # in Loop: Header=BB5_26 Depth=2 ld a6, 32(s11) slli a7, a0, 3 addiw a0, a0, 1 add a6, a6, a7 sd a5, 0(a6) - j .LBB5_26 -.LBB5_30: # %for.body40.preheader - # in Loop: Header=BB5_16 Depth=1 + j .LBB5_25 +.LBB5_29: # %for.body40.preheader + # in Loop: Header=BB5_15 Depth=1 add a4, a2, s6 slli a2, a4, 3 ld a5, 72(sp) # 8-byte Folded Reload @@ -2967,19 +2963,19 @@ add a5, s7, s6 slli a5, a5, 2 add a5, a6, a5 - j .LBB5_32 -.LBB5_31: # %for.inc56 - # in Loop: Header=BB5_32 Depth=2 + j .LBB5_31 +.LBB5_30: # %for.inc56 + # in Loop: Header=BB5_31 Depth=2 addi a4, a4, 4 addi a2, a2, 8 - beq a4, a5, .LBB5_24 -.LBB5_32: # %for.body40 - # Parent Loop BB5_16 Depth=1 + beq a4, a5, .LBB5_23 +.LBB5_31: # %for.body40 + # Parent Loop BB5_15 Depth=1 # => This Inner Loop Header: Depth=2 lw a6, 0(a4) - bne a6, a0, .LBB5_31 -# %bb.33: # %land.lhs.true - # in Loop: Header=BB5_32 Depth=2 + bne a6, a0, .LBB5_30 +# %bb.32: # %land.lhs.true + # in Loop: Header=BB5_31 Depth=2 ld a6, 0(a2) ld a6, 8(a6) lw a6, 4(a6) @@ -2987,52 +2983,52 @@ xor a6, a6, a7 snez a6, a6 addw a3, a3, a6 - j .LBB5_31 -.LBB5_34: # %for.end93 + j .LBB5_30 +.LBB5_33: # %for.end93 .Lpcrel_hi82: auipc a0, %pcrel_hi(move_vec) sd a0, 112(sp) # 8-byte Folded Spill ld a0, %pcrel_lo(.Lpcrel_hi82)(a0) - beqz a0, .LBB5_36 -# %bb.35: # %if.then.i + beqz a0, .LBB5_35 +# %bb.34: # %if.then.i sw zero, 0(a0) -.LBB5_36: # %VEC_move_t_base_truncate.exit - beqz s0, .LBB5_38 -.LBB5_37: # %for.body97 +.LBB5_35: # %VEC_move_t_base_truncate.exit + beqz s0, .LBB5_37 +.LBB5_36: # %for.body97 # =>This Inner Loop Header: Depth=1 mv a0, s0 call traverse_moves ld s0, 16(s0) - bnez s0, .LBB5_37 -.LBB5_38: # %for.end100 + bnez s0, .LBB5_36 +.LBB5_37: # %for.end100 ld a0, 112(sp) # 8-byte Folded Reload ld a0, %pcrel_lo(.Lpcrel_hi82)(a0) - beqz a0, .LBB5_44 -# %bb.39: # %VEC_move_t_base_length.exit + beqz a0, .LBB5_43 +# %bb.38: # %VEC_move_t_base_length.exit lw a1, 0(a0) - blez a1, .LBB5_44 -# %bb.40: # %for.body111.lr.ph + blez a1, .LBB5_43 +# %bb.39: # %for.body111.lr.ph li a2, 0 slli a1, a1, 3 add a1, a0, a1 - j .LBB5_42 -.LBB5_41: # %if.end124 - # in Loop: Header=BB5_42 Depth=1 + j .LBB5_41 +.LBB5_40: # %if.end124 + # in Loop: Header=BB5_41 Depth=1 addi a1, a1, -8 mv a2, a3 - beq a1, a0, .LBB5_45 -.LBB5_42: # %for.body111 + beq a1, a0, .LBB5_44 +.LBB5_41: # %for.body111 # =>This Inner Loop Header: Depth=1 ld a3, 0(a1) sd zero, 16(a3) - beqz a2, .LBB5_41 -# %bb.43: # %if.then122 - # in Loop: Header=BB5_42 Depth=1 + beqz a2, .LBB5_40 +# %bb.42: # %if.then122 + # in Loop: Header=BB5_41 Depth=1 sd a3, 16(a2) - j .LBB5_41 -.LBB5_44: + j .LBB5_40 +.LBB5_43: li a3, 0 -.LBB5_45: # %VEC_move_t_base_truncate.exit116 +.LBB5_44: # %VEC_move_t_base_truncate.exit116 sd a3, 16(sp) # 8-byte Folded Spill lw a1, 0(a0) addi a1, a1, -1 @@ -3045,8 +3041,8 @@ sw a2, %pcrel_lo(.Lpcrel_hi78)(s4) sw zero, 0(a0) sd a1, 24(sp) # 8-byte Folded Spill - beqz a1, .LBB5_72 -# %bb.46: # %for.body144.preheader + beqz a1, .LBB5_71 +# %bb.45: # %for.body144.preheader slli a7, s5, 1 srli s11, s5, 2 .Lpcrel_hi83: @@ -3076,23 +3072,23 @@ sd a0, 48(sp) # 8-byte Folded Spill ld s0, 24(sp) # 8-byte Folded Reload sd a7, 32(sp) # 8-byte Folded Spill - j .LBB5_48 -.LBB5_47: # %for.inc236 - # in Loop: Header=BB5_48 Depth=1 + j .LBB5_47 +.LBB5_46: # %for.inc236 + # in Loop: Header=BB5_47 Depth=1 ld s0, 16(s0) - beqz s0, .LBB5_72 -.LBB5_48: # %for.body144 + beqz s0, .LBB5_71 +.LBB5_47: # %for.body144 # =>This Loop Header: Depth=1 - # Child Loop BB5_52 Depth 2 - # Child Loop BB5_68 Depth 2 - # Child Loop BB5_71 Depth 2 + # Child Loop BB5_51 Depth 2 + # Child Loop BB5_67 Depth 2 + # Child Loop BB5_70 Depth 2 ld a1, 0(s0) lw a0, 12(a1) ld a2, 8(s0) sd a2, 56(sp) # 8-byte Folded Spill - bltz a0, .LBB5_63 -# %bb.49: # %if.then150 - # in Loop: Header=BB5_48 Depth=1 + bltz a0, .LBB5_62 +# %bb.48: # %if.then150 + # in Loop: Header=BB5_47 Depth=1 lwu a1, 8(a1) li a2, 87 mul a2, a0, a2 @@ -3100,9 +3096,9 @@ add a1, a3, a1 add a1, a1, a2 lbu a1, 0(a1) - beqz a1, .LBB5_63 -# %bb.50: # %for.body160.preheader - # in Loop: Header=BB5_48 Depth=1 + beqz a1, .LBB5_62 +# %bb.49: # %for.body160.preheader + # in Loop: Header=BB5_47 Depth=1 slli s9, a0, 3 ld a2, 72(sp) # 8-byte Folded Reload add s9, a2, s9 @@ -3112,26 +3108,26 @@ slli a1, a1, 2 add a0, a2, a0 add s8, a0, a1 - j .LBB5_52 -.LBB5_51: # %for.inc208 - # in Loop: Header=BB5_52 Depth=2 + j .LBB5_51 +.LBB5_50: # %for.inc208 + # in Loop: Header=BB5_51 Depth=2 addi s7, s7, 4 addi s9, s9, 8 - beq s7, s8, .LBB5_63 -.LBB5_52: # %for.body160 - # Parent Loop BB5_48 Depth=1 + beq s7, s8, .LBB5_62 +.LBB5_51: # %for.body160 + # Parent Loop BB5_47 Depth=1 # => This Inner Loop Header: Depth=2 lw a0, 0(s7) lw a1, %pcrel_lo(.Lpcrel_hi78)(s4) - bne a0, a1, .LBB5_51 -# %bb.53: # %land.lhs.true166 - # in Loop: Header=BB5_52 Depth=2 + bne a0, a1, .LBB5_50 +# %bb.52: # %land.lhs.true166 + # in Loop: Header=BB5_51 Depth=2 ld s1, 0(s9) ld a1, 8(s1) lw a0, 12(a1) - bltz a0, .LBB5_51 -# %bb.54: # %if.then174 - # in Loop: Header=BB5_52 Depth=2 + bltz a0, .LBB5_50 +# %bb.53: # %if.then174 + # in Loop: Header=BB5_51 Depth=2 lw a0, 4(a1) ld a2, 32(a1) li a1, 0 @@ -3175,20 +3171,20 @@ lw a1, 0(s6) sd a0, 24(s3) li a0, 4 - blt a1, a0, .LBB5_57 -# %bb.55: # %if.then174 - # in Loop: Header=BB5_52 Depth=2 + blt a1, a0, .LBB5_56 +# %bb.54: # %if.then174 + # in Loop: Header=BB5_51 Depth=2 ld a0, 104(sp) # 8-byte Folded Reload ld a0, 0(a0) - beqz a0, .LBB5_57 -# %bb.56: # %if.then.i118 - # in Loop: Header=BB5_52 Depth=2 + beqz a0, .LBB5_56 +# %bb.55: # %if.then.i118 + # in Loop: Header=BB5_51 Depth=2 lw a2, 8(s3) lw a3, 8(s10) ld a1, 48(sp) # 8-byte Folded Reload call fprintf -.LBB5_57: # %create_new_reg.exit - # in Loop: Header=BB5_52 Depth=2 +.LBB5_56: # %create_new_reg.exit + # in Loop: Header=BB5_51 Depth=2 lw a0, 0(s2) ld a1, 80(sp) # 8-byte Folded Reload lw a1, 0(a1) @@ -3211,21 +3207,21 @@ sd zero, 40(s3) sb zero, 24(s3) sd s2, 8(s1) - beqz a0, .LBB5_59 -# %bb.58: # %VEC_move_t_base_space.exit.i.i - # in Loop: Header=BB5_52 Depth=2 + beqz a0, .LBB5_58 +# %bb.57: # %VEC_move_t_base_space.exit.i.i + # in Loop: Header=BB5_51 Depth=2 lw a2, 4(a0) lw a1, 0(a0) - bne a2, a1, .LBB5_60 -.LBB5_59: # %if.then.i.i - # in Loop: Header=BB5_52 Depth=2 + bne a2, a1, .LBB5_59 +.LBB5_58: # %if.then.i.i + # in Loop: Header=BB5_51 Depth=2 li a1, 1 call vec_heap_p_reserve ld a1, 112(sp) # 8-byte Folded Reload sd a0, %pcrel_lo(.Lpcrel_hi82)(a1) lw a1, 0(a0) -.LBB5_60: # %VEC_move_t_heap_safe_push.exit - # in Loop: Header=BB5_52 Depth=2 +.LBB5_59: # %VEC_move_t_heap_safe_push.exit + # in Loop: Header=BB5_51 Depth=2 slli a2, a1, 32 addi a1, a1, 1 sw a1, 0(a0) @@ -3240,14 +3236,14 @@ addi a0, a0, 1 sw a0, 0(a1) li a0, 3 - blt a2, a0, .LBB5_51 -# %bb.61: # %VEC_move_t_heap_safe_push.exit - # in Loop: Header=BB5_52 Depth=2 + blt a2, a0, .LBB5_50 +# %bb.60: # %VEC_move_t_heap_safe_push.exit + # in Loop: Header=BB5_51 Depth=2 ld a0, 104(sp) # 8-byte Folded Reload ld a0, 0(a0) - beqz a0, .LBB5_51 -# %bb.62: # %if.then201 - # in Loop: Header=BB5_52 Depth=2 + beqz a0, .LBB5_50 +# %bb.61: # %if.then201 + # in Loop: Header=BB5_51 Depth=2 ld a1, 16(s2) lw a2, 0(s2) lw a3, 8(a1) @@ -3255,15 +3251,15 @@ auipc a1, %pcrel_hi(.L.str.9) addi a1, a1, %pcrel_lo(.Lpcrel_hi89) call fprintf - j .LBB5_51 -.LBB5_63: # %if.end211 - # in Loop: Header=BB5_48 Depth=1 + j .LBB5_50 +.LBB5_62: # %if.end211 + # in Loop: Header=BB5_47 Depth=1 ld a0, 56(sp) # 8-byte Folded Reload lw a1, 12(a0) ld a7, 32(sp) # 8-byte Folded Reload - bltz a1, .LBB5_47 -# %bb.64: # %if.end216 - # in Loop: Header=BB5_48 Depth=1 + bltz a1, .LBB5_46 +# %bb.63: # %if.end216 + # in Loop: Header=BB5_47 Depth=1 lwu a0, 8(a0) li a2, 87 mul a2, a1, a2 @@ -3271,16 +3267,16 @@ add a0, a3, a0 add a0, a0, a2 lbu a2, 0(a0) - beqz a2, .LBB5_47 -# %bb.65: # %for.body226.lr.ph - # in Loop: Header=BB5_48 Depth=1 + beqz a2, .LBB5_46 +# %bb.64: # %for.body226.lr.ph + # in Loop: Header=BB5_47 Depth=1 lw a0, %pcrel_lo(.Lpcrel_hi78)(s4) - bgeu a2, s11, .LBB5_67 -# %bb.66: # in Loop: Header=BB5_48 Depth=1 + bgeu a2, s11, .LBB5_66 +# %bb.65: # in Loop: Header=BB5_47 Depth=1 li a3, 0 - j .LBB5_70 -.LBB5_67: # %vector.ph207 - # in Loop: Header=BB5_48 Depth=1 + j .LBB5_69 +.LBB5_66: # %vector.ph207 + # in Loop: Header=BB5_47 Depth=1 srli a3, s5, 3 li a4, 254 mul a3, a3, a4 @@ -3296,20 +3292,20 @@ ld a6, 72(sp) # 8-byte Folded Reload add a5, a6, a5 mv a6, a3 -.LBB5_68: # %vector.body212 - # Parent Loop BB5_48 Depth=1 +.LBB5_67: # %vector.body212 + # Parent Loop BB5_47 Depth=1 # => This Inner Loop Header: Depth=2 vs2r.v v8, (a5) vs1r.v v10, (a4) add a4, a4, s5 sub a6, a6, s11 add a5, a5, a7 - bnez a6, .LBB5_68 -# %bb.69: # %middle.block204 - # in Loop: Header=BB5_48 Depth=1 - beq a3, a2, .LBB5_47 -.LBB5_70: # %for.body226.preheader - # in Loop: Header=BB5_48 Depth=1 + bnez a6, .LBB5_67 +# %bb.68: # %middle.block204 + # in Loop: Header=BB5_47 Depth=1 + beq a3, a2, .LBB5_46 +.LBB5_69: # %for.body226.preheader + # in Loop: Header=BB5_47 Depth=1 add a4, a3, a1 slli a3, a4, 2 ld a6, 64(sp) # 8-byte Folded Reload @@ -3320,35 +3316,35 @@ add a1, a2, a1 slli a1, a1, 2 add a1, a6, a1 -.LBB5_71: # %for.body226 - # Parent Loop BB5_48 Depth=1 +.LBB5_70: # %for.body226 + # Parent Loop BB5_47 Depth=1 # => This Inner Loop Header: Depth=2 sd s0, 0(a4) sw a0, 0(a3) addi a3, a3, 4 addi a4, a4, 8 - bne a3, a1, .LBB5_71 - j .LBB5_47 -.LBB5_72: # %for.end238 + bne a3, a1, .LBB5_70 + j .LBB5_46 +.LBB5_71: # %for.end238 ld a0, 112(sp) # 8-byte Folded Reload ld a0, %pcrel_lo(.Lpcrel_hi82)(a0) ld a3, 16(sp) # 8-byte Folded Reload - beqz a0, .LBB5_76 -# %bb.73: # %VEC_move_t_base_length.exit123 + beqz a0, .LBB5_75 +# %bb.72: # %VEC_move_t_base_length.exit123 lw a1, 0(a0) - blez a1, .LBB5_76 -# %bb.74: # %for.body250.lr.ph + blez a1, .LBB5_75 +# %bb.73: # %for.body250.lr.ph slli a1, a1, 3 add a1, a0, a1 -.LBB5_75: # %for.body250 +.LBB5_74: # %for.body250 # =>This Inner Loop Header: Depth=1 ld a2, 0(a1) sd zero, 16(a2) addi a1, a1, -8 sd a2, 16(a3) mv a3, a2 - bne a1, a0, .LBB5_75 -.LBB5_76: # %cleanup + bne a1, a0, .LBB5_74 +.LBB5_75: # %cleanup ld a0, 24(sp) # 8-byte Folded Reload csrr a1, vlenb slli a1, a1, 1 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/pointcache.s 2024-04-01 12:40:58.842464542 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/pointcache.s 2024-04-01 12:41:10.838130006 +0000 @@ -729,33 +729,32 @@ mul a5, a5, a7 vsetvli a7, zero, e64, m4, ta, ma vid.v v8 - vmul.vx v8, v8, a6 + vmul.vx v12, v8, a6 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v12, 0 + vmv.v.i v8, 0 li a6, 140 li a7, 148 mv t0, a3 - vmv.v.i v14, 0 .LBB11_7: # %vector.body # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e64, m4, ta, ma - vadd.vx v16, v8, a4 + vadd.vx v16, v12, a4 vsetvli zero, zero, e32, m2, ta, ma - vluxei64.v v20, (a6), v16 - vfsub.vf v20, v20, fa4 - vmfle.vf v0, v20, fa5 - vluxei64.v v20, (a7), v16, v0.t - vfadd.vf v16, v20, fa4 - vmfge.vf v18, v16, fa5 - vmand.mm v0, v0, v18 - vmerge.vim v16, v12, 1, v0 - vadd.vv v14, v14, v16 + vluxei64.v v10, (a6), v16 + vfsub.vf v10, v10, fa4 + vmfle.vf v0, v10, fa5 + vluxei64.v v10, (a7), v16, v0.t + vfadd.vf v10, v10, fa4 + vmfge.vf v16, v10, fa5 + vmand.mm v0, v0, v16 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub t0, t0, a0 add a4, a4, a5 bnez t0, .LBB11_7 # %bb.8: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v14, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a0, v8 bne a3, a2, .LBB11_10 .LBB11_9: # %cleanup --- build.a/External/SPEC/CFP2017rate/511.povray_r/CMakeFiles/511.povray_r.dir/root/cpu2017/benchspec/CPU/511.povray_r/src/lathe.s 2024-04-01 12:40:58.578471904 +0000 +++ build.b/External/SPEC/CFP2017rate/511.povray_r/CMakeFiles/511.povray_r.dir/root/cpu2017/benchspec/CPU/511.povray_r/src/lathe.s 2024-04-01 12:41:10.570137480 +0000 @@ -1968,16 +1968,15 @@ fld fs4, %pcrel_lo(.Lpcrel_hi35)(a0) addi s10, s4, 1 vsetvli a0, zero, e32, m1, ta, ma - vmv.v.i v20, 0 + vmv.v.i v8, 0 + addi a0, sp, 96 + vs1r.v v8, (a0) # Unknown-size Folded Spill vsetvli zero, zero, e64, m2, ta, ma - vid.v v22 + vid.v v20 csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 96 - vs1r.v v20, (a0) # Unknown-size Folded Spill - addi a0, sp, 96 - vs2r.v v22, (a0) # Unknown-size Folded Spill + vs2r.v v20, (a0) # Unknown-size Folded Spill j .LBB4_7 .LBB4_6: # %for.inc # in Loop: Header=BB4_7 Depth=1 @@ -2035,12 +2034,9 @@ slli a1, a0, 32 srli a1, a1, 32 csrr a2, vlenb - slli a2, a2, 1 add a2, sp, a2 addi a2, a2, 96 - vl1r.v v20, (a2) # Unknown-size Folded Reload - addi a2, sp, 96 - vl2r.v v22, (a2) # Unknown-size Folded Reload + vl2r.v v20, (a2) # Unknown-size Folded Reload bgeu a1, s8, .LBB4_12 # %bb.11: # in Loop: Header=BB4_7 Depth=1 mv a2, a0 @@ -2055,7 +2051,8 @@ and a3, a3, a1 sub a2, a0, a3 vsetvli a7, zero, e32, m1, tu, ma - vmv1r.v v8, v20 + addi a7, sp, 96 + vl1r.v v8, (a7) # Unknown-size Folded Reload vmv.s.x v8, s11 slli a0, a0, 3 add a0, s7, a0 @@ -2066,7 +2063,7 @@ vl2re64.v v10, (a0) addi t0, s8, -1 vsetvli zero, zero, e64, m2, ta, ma - vrsub.vx v12, v22, t0 + vrsub.vx v12, v20, t0 vrgather.vv v14, v10, v12 vmfge.vf v9, v14, fs0 vmfle.vf v10, v14, fs4 @@ -2081,9 +2078,8 @@ vfsub.vf v10, v18, fs1 vmfge.vf v9, v10, fs0 vmand.mm v0, v0, v9 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v20, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a7, a7, s8 add a0, a0, s6 bnez a7, .LBB4_13 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/modifiers/intern/MOD_array.s 2024-04-01 12:40:59.062458406 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/modifiers/intern/MOD_array.s 2024-04-01 12:41:11.070123537 +0000 @@ -1232,38 +1232,36 @@ mul a0, a0, a2 and a0, a0, a6 vsetvli a2, zero, e64, m4, ta, ma - vid.v v8 + vid.v v12 slli a2, a3, 1 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v12, 0 - vmv.v.i v16, -1 + vmv.v.i v8, 0 + vmv.v.i v10, -1 mv a3, a0 mv a4, s10 - vmv.v.i v14, 0 .LBB1_136: # %vector.body56 # =>This Inner Loop Header: Depth=1 - vl2re32.v v18, (a4) + vl2re32.v v16, (a4) vsetvli zero, zero, e32, m2, ta, ma - vmsne.vi v20, v18, -1 + vmsne.vi v18, v16, -1 vsetvli zero, zero, e64, m4, ta, ma - vzext.vf2 v24, v18 - vmseq.vv v18, v8, v24 - vmand.mm v0, v20, v18 - vmsne.vv v18, v8, v24 - vse32.v v16, (a4), v0.t - vmand.mm v0, v20, v18 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v18, v12, 1, v0 - vadd.vv v14, v14, v18 + vzext.vf2 v20, v16 + vmseq.vv v16, v12, v20 + vmand.mm v0, v18, v16 + vmsne.vv v16, v12, v20 + vse32.v v10, (a4), v0.t + vmand.mm v0, v18, v16 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma - vadd.vx v8, v8, a1 + vadd.vx v12, v12, a1 sub a3, a3, a1 add a4, a4, a2 bnez a3, .LBB1_136 # %bb.137: # %middle.block48 - vmv.s.x v8, zero + vmv.s.x v10, zero vsetvli zero, zero, e32, m2, ta, ma - vredsum.vs v8, v14, v8 + vredsum.vs v8, v8, v10 vmv.x.s a2, v8 bne a0, a6, .LBB1_144 .LBB1_138: # %for.end409.i --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/modulo-sched.s 2024-04-01 12:41:00.450419698 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/modulo-sched.s 2024-04-01 12:41:12.510083380 +0000 @@ -907,13 +907,7 @@ lui a1, 524288 addiw a1, a1, -1 sd a1, 88(sp) # 8-byte Folded Spill - vsetivli zero, 2, e32, mf2, ta, ma - vmv.v.i v8, 0 - csrr a1, vlenb - add a1, sp, a1 - addi a1, a1, 432 - vs1r.v v8, (a1) # Unknown-size Folded Spill - vsetvli zero, zero, e64, m1, ta, ma + vsetivli zero, 2, e64, m1, ta, ma vmv.v.i v8, 0 addi a1, sp, 432 vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -3350,13 +3344,8 @@ vmv.v.x v8, s0 vmsle.vv v0, v8, v10 ld a0, 8(s8) - csrr a1, vlenb - add a1, sp, a1 - addi a1, a1, 432 - vl1r.v v8, (a1) # Unknown-size Folded Reload - vmerge.vim v8, v8, 1, v0 - vadd.vv v8, v11, v8 - vse32.v v8, (s10) + vadd.vi v11, v11, 1, v0.t + vse32.v v11, (s10) call free lw a0, 16(s8) sd s4, 8(s8) --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/animation/keyframes_general.s 2024-04-01 12:40:58.914462533 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/animation/keyframes_general.s 2024-04-01 12:41:10.914127887 +0000 @@ -618,40 +618,38 @@ mul a6, a6, t0 vsetvli t0, zero, e64, m4, ta, ma vid.v v8 - vmul.vx v8, v8, a7 + vmul.vx v12, v8, a7 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v12, 0 + vmv.v.i v8, 0 li a7, 52 li t0, 51 li t1, 53 mv t2, a4 - vmv.v.i v14, 0 .LBB5_6: # %vector.body # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e64, m4, ta, ma - vadd.vx v16, v8, a2 + vadd.vx v16, v12, a2 vsetvli zero, zero, e8, mf2, ta, ma - vluxei64.v v20, (a7), v16 - vand.vi v20, v20, 1 - vmseq.vi v0, v20, 0 - vluxei64.v v21, (t0), v16, v0.t - vand.vi v21, v21, 1 - vor.vv v20, v20, v21 - vmseq.vi v0, v20, 0 - vluxei64.v v21, (t1), v16, v0.t - vmsne.vi v16, v20, 0 - vand.vi v17, v21, 1 - vmsne.vi v17, v17, 0 - vmor.mm v0, v16, v17 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v16, v12, 1, v0 - vadd.vv v14, v14, v16 + vluxei64.v v10, (a7), v16 + vand.vi v10, v10, 1 + vmseq.vi v0, v10, 0 + vluxei64.v v11, (t0), v16, v0.t + vand.vi v11, v11, 1 + vor.vv v10, v10, v11 + vmseq.vi v0, v10, 0 + vluxei64.v v11, (t1), v16, v0.t + vmsne.vi v10, v10, 0 + vand.vi v11, v11, 1 + vmsne.vi v11, v11, 0 + vmor.mm v0, v10, v11 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub t2, t2, a5 add a2, a2, a6 bnez t2, .LBB5_6 # %bb.7: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v14, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s0, v8 bne a4, a1, .LBB5_16 .LBB5_8: # %for.end --- build.a/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s 2024-04-01 12:41:02.830353326 +0000 +++ build.b/MultiSource/Benchmarks/FreeBench/neural/CMakeFiles/neural.dir/neural.s 2024-04-01 12:41:14.942015560 +0000 @@ -65,14 +65,14 @@ addi a2, a2, %pcrel_lo(.Lpcrel_hi4) call fprintf li a0, 2 - bne s0, a0, .LBB0_116 + bne s0, a0, .LBB0_115 # %bb.1: # %if.end ld a0, 8(s1) .Lpcrel_hi7: auipc a1, %pcrel_hi(.L.str.6) addi a1, a1, %pcrel_lo(.Lpcrel_hi7) call fopen - beqz a0, .LBB0_117 + beqz a0, .LBB0_116 # %bb.2: # %if.end10 mv s0, a0 addi a0, sp, 68 @@ -134,9 +134,9 @@ auipc a1, %pcrel_hi(stored) sd a1, 32(sp) # 8-byte Folded Spill sd a0, %pcrel_lo(.Lpcrel_hi15)(a1) - beqz s2, .LBB0_118 + beqz s2, .LBB0_117 # %bb.3: # %if.end10 - beqz a0, .LBB0_118 + beqz a0, .LBB0_117 # %bb.4: # %if.end31 ld a0, 48(sp) # 8-byte Folded Reload lw s1, %pcrel_lo(.Lpcrel_hi12)(a0) @@ -146,7 +146,7 @@ .Lpcrel_hi17: auipc s11, %pcrel_hi(Tmatrix) sd a0, %pcrel_lo(.Lpcrel_hi17)(s11) - beqz a0, .LBB0_119 + beqz a0, .LBB0_118 # %bb.5: # %for.cond.preheader slli s2, s1, 2 blez s1, .LBB0_9 @@ -162,7 +162,7 @@ ld s4, %pcrel_lo(.Lpcrel_hi17)(s11) add a0, s4, s1 ld a0, 0(a0) - beqz a0, .LBB0_114 + beqz a0, .LBB0_113 # %bb.8: # %for.cond # in Loop: Header=BB0_7 Depth=1 addi s1, s1, 8 @@ -187,11 +187,11 @@ .Lpcrel_hi22: auipc s6, %pcrel_hi(generators) sd a0, %pcrel_lo(.Lpcrel_hi22)(s6) - beqz s4, .LBB0_120 + beqz s4, .LBB0_119 # %bb.10: # %for.end - beqz s1, .LBB0_120 + beqz s1, .LBB0_119 # %bb.11: # %for.end - beqz a0, .LBB0_120 + beqz a0, .LBB0_119 # %bb.12: # %for.cond68.preheader blez s7, .LBB0_18 # %bb.13: # %for.body71.lr.ph @@ -215,19 +215,19 @@ ld s4, %pcrel_lo(.Lpcrel_hi20)(s9) add a0, s4, s1 ld a0, 0(a0) - beqz a0, .LBB0_113 + beqz a0, .LBB0_112 # %bb.15: # %lor.lhs.false90 # in Loop: Header=BB0_14 Depth=1 ld a0, %pcrel_lo(.Lpcrel_hi21)(s10) add a0, a0, s1 ld a0, 0(a0) - beqz a0, .LBB0_113 + beqz a0, .LBB0_112 # %bb.16: # %lor.lhs.false94 # in Loop: Header=BB0_14 Depth=1 ld a0, %pcrel_lo(.Lpcrel_hi22)(s6) add a0, a0, s1 ld a0, 0(a0) - beqz a0, .LBB0_113 + beqz a0, .LBB0_112 # %bb.17: # %for.cond68 # in Loop: Header=BB0_14 Depth=1 addi s1, s1, 8 @@ -236,7 +236,7 @@ lw a0, %pcrel_lo(.Lpcrel_hi9)(s8) addiw a0, a0, 2 call malloc - beqz a0, .LBB0_121 + beqz a0, .LBB0_120 # %bb.19: # %for.cond.preheader.i blez s7, .LBB0_28 # %bb.20: # %for.body.i.preheader @@ -406,9 +406,9 @@ # => This Inner Loop Header: Depth=3 vl2re32.v v12, (s0) vl2re32.v v14, (t6) + vsetvli zero, zero, e32, m2, ta, mu vmsne.vv v0, v12, v14 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vadd.vi v10, v10, 1, v0.t add s0, s0, a4 sub t4, t4, a5 add t6, t6, a4 @@ -460,10 +460,10 @@ # => This Inner Loop Header: Depth=3 vl2re32.v v12, (s0) vl2re32.v v14, (s1) + vsetvli zero, zero, e32, m2, ta, mu vrsub.vi v12, v12, 0 vmsne.vv v0, v14, v12 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vadd.vi v10, v10, 1, v0.t add s1, s1, a4 sub t6, t6, a5 add s0, s0, a4 @@ -652,7 +652,7 @@ lw a0, %pcrel_lo(.Lpcrel_hi12)(a0) slli a0, a0, 2 call malloc - beqz a0, .LBB0_122 + beqz a0, .LBB0_121 # %bb.70: # %do.body.preheader.i mv s0, a0 ld a0, 56(sp) # 8-byte Folded Reload @@ -854,7 +854,7 @@ ld a0, 56(sp) # 8-byte Folded Reload lw a0, %pcrel_lo(.Lpcrel_hi11)(a0) ld s7, 32(sp) # 8-byte Folded Reload - blez a0, .LBB0_112 + blez a0, .LBB0_111 # %bb.93: # %for.body.i51.preheader li s0, 0 li s1, 0 @@ -885,7 +885,7 @@ call run j .LBB0_95 .LBB0_98: # %for.cond19.preheader.i - blez a0, .LBB0_112 + blez a0, .LBB0_111 # %bb.99: # %for.body22.i.preheader li s3, 0 slli s4, s2, 1 @@ -897,40 +897,36 @@ ld a1, 40(sp) # 8-byte Folded Reload mul s2, a0, a1 vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v14, 0 + vmv.v.i v8, 0 + addi a0, sp, 176 + vs2r.v v8, (a0) # Unknown-size Folded Spill li s6, 1 .Lpcrel_hi36: auipc a0, %pcrel_hi(.L.str.20) addi s1, a0, %pcrel_lo(.Lpcrel_hi36) - addi a0, sp, 176 - vs2r.v v14, (a0) # Unknown-size Folded Spill - j .LBB0_102 + j .LBB0_101 .LBB0_100: # %if.then30.i - # in Loop: Header=BB0_102 Depth=1 + # in Loop: Header=BB0_101 Depth=1 ld a2, %pcrel_lo(.Lpcrel_hi15)(s7) add a0, a2, a0 sw s6, 0(a0) mv a0, s1 call printf -.LBB0_101: # %for.inc39.i - # in Loop: Header=BB0_102 Depth=1 ld a0, 56(sp) # 8-byte Folded Reload lw a0, %pcrel_lo(.Lpcrel_hi11)(a0) addi s3, s3, 1 - addi a1, sp, 176 - vl2r.v v14, (a1) # Unknown-size Folded Reload - bge s3, a0, .LBB0_112 -.LBB0_102: # %for.body22.i + bge s3, a0, .LBB0_111 +.LBB0_101: # %for.body22.i # =>This Loop Header: Depth=1 - # Child Loop BB0_106 Depth 2 - # Child Loop BB0_109 Depth 2 + # Child Loop BB0_105 Depth 2 + # Child Loop BB0_108 Depth 2 ld a0, 48(sp) # 8-byte Folded Reload lw a3, %pcrel_lo(.Lpcrel_hi12)(a0) slli a0, s3, 2 sext.w a1, s3 blez a3, .LBB0_100 -# %bb.103: # %for.body.preheader.i.i - # in Loop: Header=BB0_102 Depth=1 +# %bb.102: # %for.body.preheader.i.i + # in Loop: Header=BB0_101 Depth=1 ld a2, %pcrel_lo(.Lpcrel_hi20)(s9) ld a5, %pcrel_lo(.Lpcrel_hi21)(s10) slli a6, s3, 3 @@ -938,46 +934,46 @@ ld a4, 0(a2) add a5, a5, a6 ld a5, 0(a5) - bgeu a3, s5, .LBB0_105 -# %bb.104: # in Loop: Header=BB0_102 Depth=1 + bgeu a3, s5, .LBB0_104 +# %bb.103: # in Loop: Header=BB0_101 Depth=1 li a6, 0 li a2, 0 - j .LBB0_108 -.LBB0_105: # %vector.ph139 - # in Loop: Header=BB0_102 Depth=1 + j .LBB0_107 +.LBB0_104: # %vector.ph139 + # in Loop: Header=BB0_101 Depth=1 and a6, s2, a3 - vsetvli a2, zero, e32, m2, ta, ma + vsetvli a2, zero, e32, m2, ta, mu mv a2, a6 mv a7, a5 mv t0, a4 - vmv2r.v v8, v14 -.LBB0_106: # %vector.body144 - # Parent Loop BB0_102 Depth=1 + addi t1, sp, 176 + vl2r.v v8, (t1) # Unknown-size Folded Reload +.LBB0_105: # %vector.body144 + # Parent Loop BB0_101 Depth=1 # => This Inner Loop Header: Depth=2 vl2re32.v v10, (t0) vl2re32.v v12, (a7) vmsne.vv v0, v10, v12 - vmerge.vim v10, v14, 1, v0 - vadd.vv v8, v8, v10 + vadd.vi v8, v8, 1, v0.t add t0, t0, s4 sub a2, a2, s5 add a7, a7, s4 - bnez a2, .LBB0_106 -# %bb.107: # %middle.block136 - # in Loop: Header=BB0_102 Depth=1 + bnez a2, .LBB0_105 +# %bb.106: # %middle.block136 + # in Loop: Header=BB0_101 Depth=1 vmv.s.x v10, zero vredsum.vs v8, v8, v10 vmv.x.s a2, v8 - beq a6, a3, .LBB0_110 -.LBB0_108: # %for.body.i.i.preheader - # in Loop: Header=BB0_102 Depth=1 + beq a6, a3, .LBB0_109 +.LBB0_107: # %for.body.i.i.preheader + # in Loop: Header=BB0_101 Depth=1 slli a7, a6, 2 add a6, a5, a7 add a4, a4, a7 slli a3, a3, 2 add a5, a5, a3 -.LBB0_109: # %for.body.i.i - # Parent Loop BB0_102 Depth=1 +.LBB0_108: # %for.body.i.i + # Parent Loop BB0_101 Depth=1 # => This Inner Loop Header: Depth=2 lw a3, 0(a4) lw a7, 0(a6) @@ -986,19 +982,22 @@ addw a2, a2, a3 addi a6, a6, 4 addi a4, a4, 4 - bne a6, a5, .LBB0_109 -.LBB0_110: # %hamming.exit.i - # in Loop: Header=BB0_102 Depth=1 + bne a6, a5, .LBB0_108 +.LBB0_109: # %hamming.exit.i + # in Loop: Header=BB0_101 Depth=1 beqz a2, .LBB0_100 -# %bb.111: # %if.else34.i - # in Loop: Header=BB0_102 Depth=1 +# %bb.110: # %if.else34.i + # in Loop: Header=BB0_101 Depth=1 ld a3, %pcrel_lo(.Lpcrel_hi15)(s7) add a0, a3, a0 sw zero, 0(a0) mv a0, s0 call printf - j .LBB0_101 -.LBB0_112: # %storecheck.exit + ld a0, 56(sp) # 8-byte Folded Reload + lw a0, %pcrel_lo(.Lpcrel_hi11)(a0) + addi s3, s3, 1 + blt s3, a0, .LBB0_101 +.LBB0_111: # %storecheck.exit li a0, 0 csrr a1, vlenb slli a1, a1, 1 @@ -1020,25 +1019,25 @@ fld fs1, 184(sp) # 8-byte Folded Reload addi sp, sp, 304 ret -.LBB0_113: # %if.then98 +.LBB0_112: # %if.then98 ld a0, 24(sp) # 8-byte Folded Reload ld a3, 0(a0) .Lpcrel_hi24: auipc a0, %pcrel_hi(.L.str.9) addi a0, a0, %pcrel_lo(.Lpcrel_hi24) - j .LBB0_115 -.LBB0_114: # %if.then48 + j .LBB0_114 +.LBB0_113: # %if.then48 ld a3, 0(s6) .Lpcrel_hi19: auipc a0, %pcrel_hi(.L.str.9) addi a0, a0, %pcrel_lo(.Lpcrel_hi19) -.LBB0_115: # %if.then29 +.LBB0_114: # %if.then29 li a1, 21 li a2, 1 call fwrite li a0, 1 call exit -.LBB0_116: # %if.then +.LBB0_115: # %if.then ld a0, 0(s6) addiw a2, s0, -1 .Lpcrel_hi5: @@ -1053,7 +1052,7 @@ call fprintf li a0, 1 call exit -.LBB0_117: # %if.then7 +.LBB0_116: # %if.then7 ld a0, 0(s6) ld a2, 8(s1) .Lpcrel_hi8: @@ -1062,39 +1061,39 @@ call fprintf li a0, 1 call exit -.LBB0_118: # %if.then29 +.LBB0_117: # %if.then29 ld a3, 0(s6) .Lpcrel_hi16: auipc a0, %pcrel_hi(.L.str.9) addi a0, a0, %pcrel_lo(.Lpcrel_hi16) - j .LBB0_115 -.LBB0_119: # %if.then36 + j .LBB0_114 +.LBB0_118: # %if.then36 ld a3, 0(s6) .Lpcrel_hi18: auipc a0, %pcrel_hi(.L.str.9) addi a0, a0, %pcrel_lo(.Lpcrel_hi18) - j .LBB0_115 -.LBB0_120: # %if.then65 + j .LBB0_114 +.LBB0_119: # %if.then65 ld a0, 24(sp) # 8-byte Folded Reload ld a3, 0(a0) .Lpcrel_hi23: auipc a0, %pcrel_hi(.L.str.9) addi a0, a0, %pcrel_lo(.Lpcrel_hi23) - j .LBB0_115 -.LBB0_121: # %if.then.i + j .LBB0_114 +.LBB0_120: # %if.then.i ld a0, 24(sp) # 8-byte Folded Reload ld a3, 0(a0) .Lpcrel_hi26: auipc a0, %pcrel_hi(.L.str.9) addi a0, a0, %pcrel_lo(.Lpcrel_hi26) - j .LBB0_115 -.LBB0_122: # %if.then.i50 + j .LBB0_114 +.LBB0_121: # %if.then.i50 ld a0, 24(sp) # 8-byte Folded Reload ld a3, 0(a0) .Lpcrel_hi33: auipc a0, %pcrel_hi(.L.str.9) addi a0, a0, %pcrel_lo(.Lpcrel_hi33) - j .LBB0_115 + j .LBB0_114 .Lfunc_end0: .size main, .Lfunc_end0-main .cfi_endproc @@ -1220,6 +1219,7 @@ # Parent Loop BB1_4 Depth=1 # => This Inner Loop Header: Depth=2 vl2re32.v v10, (t2) + vsetvli zero, zero, e32, m2, ta, ma vmseq.vi v0, v10, 0 vle32.v v10, (t1), v0.t vse32.v v10, (t2), v0.t @@ -1272,9 +1272,9 @@ # => This Inner Loop Header: Depth=2 vl2re32.v v12, (t2) vl2re32.v v14, (t1) + vsetvli zero, zero, e32, m2, ta, mu vmsne.vv v0, v12, v14 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vadd.vi v10, v10, 1, v0.t add t2, t2, a3 sub a7, a7, a2 add t1, t1, a3 @@ -1504,7 +1504,10 @@ sd a1, 56(sp) # 8-byte Folded Spill vsetvli a1, zero, e32, m2, ta, ma vmv.s.x v8, zero - addi a1, sp, 64 + csrr a1, vlenb + slli a1, a1, 1 + add a1, sp, a1 + addi a1, a1, 64 vs2r.v v8, (a1) # Unknown-size Folded Spill .Lpcrel_hi46: auipc s9, %pcrel_hi(.LCPI2_0) @@ -1515,18 +1518,15 @@ .Lpcrel_hi48: auipc a1, %pcrel_hi(.LCPI2_2) fld fs4, %pcrel_lo(.Lpcrel_hi48)(a1) - vmv.v.i v14, 0 - vmv.v.i v16, -1 + vmv.v.i v8, 0 + addi a1, sp, 64 + vs2r.v v8, (a1) # Unknown-size Folded Spill + vmv.v.i v14, -1 csrr a1, vlenb slli a1, a1, 2 add a1, sp, a1 addi a1, a1, 64 vs2r.v v14, (a1) # Unknown-size Folded Spill - csrr a1, vlenb - slli a1, a1, 1 - add a1, sp, a1 - addi a1, a1, 64 - vs2r.v v16, (a1) # Unknown-size Folded Spill sd s1, 48(sp) # 8-byte Folded Spill sd a0, 24(sp) # 8-byte Folded Spill j .LBB2_7 @@ -1578,11 +1578,6 @@ add a0, sp, a0 addi a0, a0, 64 vl2r.v v14, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 64 - vl2r.v v16, (a0) # Unknown-size Folded Reload bge s0, s3, .LBB2_22 .LBB2_10: # %for.cond19.preheader # Parent Loop BB2_7 Depth=1 @@ -1610,7 +1605,10 @@ mv a2, a1 mv a3, s1 mv a4, a0 - addi a5, sp, 64 + csrr a5, vlenb + slli a5, a5, 1 + add a5, sp, a5 + addi a5, a5, 64 vl2r.v v8, (a5) # Unknown-size Folded Reload .LBB2_14: # %vector.body55 # Parent Loop BB2_7 Depth=1 @@ -1731,7 +1729,10 @@ mv a2, a1 mv a3, s8 mv a4, a0 - addi a5, sp, 64 + csrr a5, vlenb + slli a5, a5, 1 + add a5, sp, a5 + addi a5, a5, 64 vl2r.v v8, (a5) # Unknown-size Folded Reload .LBB2_33: # %vector.body40 # Parent Loop BB2_7 Depth=1 @@ -1805,11 +1806,6 @@ add a1, sp, a1 addi a1, a1, 64 vl2r.v v14, (a1) # Unknown-size Folded Reload - csrr a1, vlenb - slli a1, a1, 1 - add a1, sp, a1 - addi a1, a1, 64 - vl2r.v v16, (a1) # Unknown-size Folded Reload beqz a0, .LBB2_27 # %bb.42: # %if.then92 # in Loop: Header=BB2_28 Depth=3 @@ -1852,7 +1848,7 @@ # => This Inner Loop Header: Depth=2 vl2re32.v v8, (a4) vmfgt.vf v0, v8, fs1 - vmerge.vim v8, v16, 1, v0 + vmerge.vim v8, v14, 1, v0 vs2r.v v8, (a3) add a4, a4, s5 sub a2, a2, s6 @@ -1892,19 +1888,19 @@ # in Loop: Header=BB2_7 Depth=1 ld a1, 56(sp) # 8-byte Folded Reload and a1, a1, s3 - vsetvli a2, zero, e32, m2, ta, ma + vsetvli a2, zero, e32, m2, ta, mu mv a2, a1 mv a3, a0 mv a4, a6 - vmv2r.v v8, v14 + addi a7, sp, 64 + vl2r.v v8, (a7) # Unknown-size Folded Reload .LBB2_58: # %vector.body15 # Parent Loop BB2_7 Depth=1 # => This Inner Loop Header: Depth=2 vl2re32.v v10, (a4) vl2re32.v v12, (a3) vmsne.vv v0, v10, v12 - vmerge.vim v10, v14, 1, v0 - vadd.vv v8, v8, v10 + vadd.vi v8, v8, 1, v0.t add a4, a4, s5 sub a2, a2, s6 add a3, a3, s5 --- build.a/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/dofs/dof_tools.s 2024-04-01 12:40:58.086485624 +0000 +++ build.b/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/dofs/dof_tools.s 2024-04-01 12:41:10.126149862 +0000 @@ -157500,24 +157500,23 @@ and a4, a3, a4 slli a2, a4, 2 add a2, a0, a2 + slli a6, a6, 1 vsetvli a7, zero, e32, m2, ta, ma vmv.v.i v8, 0 - slli a6, a6, 1 mv a7, a4 mv t0, a0 - vmv.v.i v10, 0 .LBB140_9: # %vector.body # =>This Inner Loop Header: Depth=1 - vl2re32.v v12, (t0) - vmseq.vx v0, v12, s0 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vl2re32.v v10, (t0) + vsetvli zero, zero, e32, m2, ta, mu + vmseq.vx v0, v10, s0 + vadd.vi v8, v8, 1, v0.t sub a7, a7, a5 add t0, t0, a6 bnez a7, .LBB140_9 # %bb.10: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s1, v8 beq a3, a4, .LBB140_12 .LBB140_11: # %for.body.i.i @@ -157988,24 +157987,23 @@ and a4, a3, a4 slli a2, a4, 2 add a2, a0, a2 + slli a6, a6, 1 vsetvli a7, zero, e32, m2, ta, ma vmv.v.i v8, 0 - slli a6, a6, 1 mv a7, a4 mv t0, a0 - vmv.v.i v10, 0 .LBB142_9: # %vector.body # =>This Inner Loop Header: Depth=1 - vl2re32.v v12, (t0) - vmseq.vx v0, v12, s0 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vl2re32.v v10, (t0) + vsetvli zero, zero, e32, m2, ta, mu + vmseq.vx v0, v10, s0 + vadd.vi v8, v8, 1, v0.t sub a7, a7, a5 add t0, t0, a6 bnez a7, .LBB142_9 # %bb.10: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s1, v8 beq a3, a4, .LBB142_12 .LBB142_11: # %for.body.i.i @@ -158406,24 +158404,23 @@ and a4, a3, a4 slli a2, a4, 2 add a2, a0, a2 + slli a6, a6, 1 vsetvli a7, zero, e32, m2, ta, ma vmv.v.i v8, 0 - slli a6, a6, 1 mv a7, a4 mv t0, a0 - vmv.v.i v10, 0 .LBB144_9: # %vector.body # =>This Inner Loop Header: Depth=1 - vl2re32.v v12, (t0) - vmseq.vx v0, v12, s0 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vl2re32.v v10, (t0) + vsetvli zero, zero, e32, m2, ta, mu + vmseq.vx v0, v10, s0 + vadd.vi v8, v8, 1, v0.t sub a7, a7, a5 add t0, t0, a6 bnez a7, .LBB144_9 # %bb.10: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s1, v8 beq a3, a4, .LBB144_12 .LBB144_11: # %for.body.i.i --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/loop-invariant.s 2024-04-01 12:40:59.578444016 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/loop-invariant.s 2024-04-01 12:41:11.618108255 +0000 @@ -35,9 +35,10 @@ .cfi_offset s10, -96 .cfi_offset s11, -104 csrr a0, vlenb - slli a0, a0, 3 + li a1, 6 + mul a0, a0, a1 sub sp, sp, a0 - .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x05, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 720 + 8 * vlenb + .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x05, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 720 + 6 * vlenb .Lpcrel_hi0: auipc a0, %got_pcrel_hi(flag_ira_loop_pressure) ld a0, %pcrel_lo(.Lpcrel_hi0)(a0) @@ -161,15 +162,13 @@ li a0, 1 li a1, 184 csrr a2, vlenb - slli a3, a2, 2 - add a2, a3, a2 + slli a2, a2, 2 add a2, sp, a2 addi a2, a2, 608 vs1r.v v8, (a2) # Unknown-size Folded Spill call xcalloc csrr a1, vlenb - slli a2, a1, 2 - add a1, a2, a1 + slli a1, a1, 2 add a1, sp, a1 addi a1, a1, 608 vl1r.v v8, (a1) # Unknown-size Folded Reload @@ -273,8 +272,7 @@ vsetvli a1, zero, e32, m2, ta, ma vmv.v.i v8, 0 csrr a1, vlenb - slli a2, a1, 2 - add a1, a2, a1 + slli a1, a1, 2 add a1, sp, a1 addi a1, a1, 608 vs2r.v v8, (a1) # Unknown-size Folded Spill @@ -373,8 +371,7 @@ ld a0, 200(sp) # 8-byte Folded Reload lw a0, 0(a0) csrr a1, vlenb - slli a2, a1, 2 - add a1, a2, a1 + slli a1, a1, 2 add a1, sp, a1 addi a1, a1, 608 vl2r.v v16, (a1) # Unknown-size Folded Reload @@ -1885,15 +1882,14 @@ vsetvli zero, zero, e32, m1, ta, ma vmv.v.i v8, 0 vsetvli zero, zero, e32, m1, tu, ma - vmv.v.i v9, 0 - vmv.s.x v9, a2 + vmv.s.x v8, a2 addi a1, sp, 608 - vs1r.v v9, (a1) # Unknown-size Folded Spill + vs1r.v v8, (a1) # Unknown-size Folded Spill .Lpcrel_hi75: auipc a1, %got_pcrel_hi(ira_available_class_regs) ld a7, %pcrel_lo(.Lpcrel_hi75)(a1) vsetvli a1, zero, e32, m2, ta, ma - vmv.v.i v10, 0 + vmv.v.i v8, 0 sd s9, 200(sp) # 8-byte Folded Spill .LBB0_233: # %for.body # =>This Loop Header: Depth=1 @@ -1943,16 +1939,10 @@ sd s6, 56(sp) # 8-byte Folded Spill sd a0, 48(sp) # 8-byte Folded Spill csrr a0, vlenb - slli a1, a0, 2 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 608 - vs2r.v v10, (a0) # Unknown-size Folded Spill - csrr a0, vlenb slli a0, a0, 2 add a0, sp, a0 addi a0, a0, 608 - vs1r.v v8, (a0) # Unknown-size Folded Spill + vs2r.v v8, (a0) # Unknown-size Folded Spill ld a0, 136(sp) # 8-byte Folded Reload sw s4, %pcrel_lo(.Lpcrel_hi73)(a0) li a1, 100 @@ -3381,8 +3371,7 @@ and a2, a2, a0 mv a4, a2 csrr a5, vlenb - slli t0, a5, 2 - add a5, t0, a5 + slli a5, a5, 2 add a5, sp, a5 addi a5, a5, 608 vl2r.v v16, (a5) # Unknown-size Folded Reload @@ -3442,11 +3431,6 @@ vl2r.v v10, (a7) # Unknown-size Folded Reload addi a7, sp, 608 vl1r.v v8, (a7) # Unknown-size Folded Reload - csrr a7, vlenb - slli a7, a7, 2 - add a7, sp, a7 - addi a7, a7, 608 - vl1r.v v16, (a7) # Unknown-size Folded Reload ld a7, 16(sp) # 8-byte Folded Reload .LBB0_433: # %vector.body504 # Parent Loop BB0_233 Depth=1 @@ -3459,9 +3443,8 @@ vluxei64.v v12, (zero), v12, v0.t vmsne.vi v9, v12, 0 vmand.mm v0, v0, v9 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v16, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e64, m2, ta, ma vadd.vx v10, v10, a7 sub a5, a5, a7 @@ -3982,13 +3965,7 @@ slli a0, a0, 2 add a0, sp, a0 addi a0, a0, 608 - vl1r.v v8, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - slli a1, a0, 2 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 608 - vl2r.v v10, (a0) # Unknown-size Folded Reload + vl2r.v v8, (a0) # Unknown-size Folded Reload ld a0, 48(sp) # 8-byte Folded Reload ld a6, 32(sp) # 8-byte Folded Reload ld a7, 96(sp) # 8-byte Folded Reload @@ -4203,7 +4180,8 @@ sd zero, %pcrel_lo(.Lpcrel_hi70)(s10) sw zero, %pcrel_lo(.Lpcrel_hi71)(s11) csrr a0, vlenb - slli a0, a0, 3 + li a1, 6 + mul a0, a0, a1 add sp, sp, a0 ld ra, 712(sp) # 8-byte Folded Reload ld s0, 704(sp) # 8-byte Folded Reload --- build.a/External/SPEC/CINT2017speed/641.leela_s/CMakeFiles/641.leela_s.dir/root/cpu2017/benchspec/CPU/541.leela_r/src/FastBoard.s 2024-04-01 12:41:00.978404973 +0000 +++ build.b/External/SPEC/CINT2017speed/641.leela_s/CMakeFiles/641.leela_s.dir/root/cpu2017/benchspec/CPU/541.leela_r/src/FastBoard.s 2024-04-01 12:41:13.082067429 +0000 @@ -2177,22 +2177,21 @@ .LBB20_3: # %vector.ph srli a7, a5, 3 slli t0, a7, 2 + slli a7, a7, 31 vsetvli t1, zero, e32, m1, ta, ma vmv.v.i v8, 0 vsetvli zero, zero, e32, m1, tu, ma vmv.v.i v9, 0 vmv.s.x v9, a4 vsetvli a4, zero, e32, m2, ta, ma - vmv.v.i v12, 0 - slli a7, a7, 31 vmv.v.i v10, 0 sub a4, a7, t0 - vmv1r.v v10, v9 + vmv.v.i v12, 0 vsetvli zero, zero, e32, m2, tu, ma vmv.s.x v8, a3 - vmv.v.i v14, 0 and a7, a4, a2 - vmv1r.v v14, v8 + vmv1r.v v12, v9 + vmv1r.v v10, v8 li a3, 112 li a4, 64 mv t0, a7 @@ -2201,28 +2200,26 @@ # =>This Inner Loop Header: Depth=1 vl1re16.v v8, (t1) vsetvli zero, zero, e16, m1, ta, ma - vwaddu.vv v16, v8, v8 - vluxei32.v v9, (a1), v16 - vand.vx v16, v9, a3 - vmseq.vx v8, v16, a4 - vmsne.vx v16, v16, a4 + vwaddu.vv v14, v8, v8 + vluxei32.v v9, (a1), v14 + vand.vx v8, v9, a3 + vmsne.vx v8, v8, a4 vand.vi v9, v9, 7 vmseq.vi v9, v9, 4 - vmand.mm v0, v16, v9 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v16, v12, 1, v0 - vadd.vv v14, v14, v16 + vmand.mm v0, v8, v9 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v10, v10, 1, v0.t + vadd.vi v14, v12, 1 vmv1r.v v0, v8 - vmerge.vim v8, v12, 1, v0 - vadd.vv v10, v10, v8 + vmerge.vvm v12, v14, v12, v0 sub t0, t0, a6 add t1, t1, a5 bnez t0, .LBB20_4 # %bb.5: # %middle.block vmv.s.x v8, zero - vredsum.vs v9, v10, v8 + vredsum.vs v9, v12, v8 vmv.x.s a4, v9 - vredsum.vs v8, v14, v8 + vredsum.vs v8, v10, v8 vmv.x.s a3, v8 beq a7, a2, .LBB20_10 .LBB20_6: # %for.body.preheader --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/render/intern/source/rayshade.s 2024-04-01 12:40:59.098457402 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/render/intern/source/rayshade.s 2024-04-01 12:41:11.110122421 +0000 @@ -236,61 +236,60 @@ addiw a2, a2, -8 and a2, a0, a2 vsetivli zero, 4, e32, m1, ta, ma - vid.v v8 + vid.v v10 vmv.v.i v9, 0 li a4, 255 li a5, 57 li a6, 48 li a7, 128 - vmv.v.i v10, 0 vmv.v.i v11, 0 .LBB2_13: # %vector.body101 # =>This Inner Loop Header: Depth=1 - vadd.vi v12, v8, 4 + vsetvli zero, zero, e32, m1, ta, ma + vadd.vi v8, v10, 4 srliw t0, a3, 8 slli t0, t0, 6 add t0, a1, t0 ld t0, 0(t0) - vand.vx v13, v8, a4 - vand.vx v12, v12, a4 + vand.vx v12, v10, a4 + vand.vx v8, v8, a4 vsetvli zero, zero, e64, m2, ta, ma - vzext.vf2 v14, v13 - vzext.vf2 v16, v12 - vsll.vi v12, v14, 6 - vadd.vx v12, v12, t0 - vsll.vi v14, v16, 6 + vzext.vf2 v14, v12 + vzext.vf2 v12, v8 + vsll.vi v14, v14, 6 vadd.vx v14, v14, t0 + vsll.vi v12, v12, 6 + vadd.vx v12, v12, t0 vsetvli zero, zero, e8, mf4, ta, ma + vluxei64.v v8, (a5), v14 vluxei64.v v16, (a5), v12 - vluxei64.v v17, (a5), v14 + vmsle.vi v8, v8, -1 vmsle.vi v16, v16, -1 - vmsle.vi v17, v17, -1 vsetvli zero, zero, e64, m2, ta, ma - vluxei64.v v12, (a6), v12 vluxei64.v v14, (a6), v14 + vluxei64.v v12, (a6), v12 vsetvli zero, zero, e16, mf2, ta, ma - vluxei64.v v18, (a7), v12 - vluxei64.v v12, (a7), v14 - vmseq.vi v13, v18, 2 - vmseq.vi v14, v12, 2 - vmsne.vi v15, v18, 3 - vmsne.vi v12, v12, 3 - vmandn.mm v13, v13, v16 - vmand.mm v15, v15, v16 - vmor.mm v0, v15, v13 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v13, v9, 1, v0 - vmandn.mm v14, v14, v17 - vmand.mm v12, v12, v17 - vmor.mm v0, v12, v14 - vadd.vv v10, v10, v13 - vmerge.vim v12, v9, 1, v0 - vadd.vv v11, v11, v12 + vluxei64.v v17, (a7), v14 + vluxei64.v v14, (a7), v12 + vmseq.vi v12, v17, 2 + vmseq.vi v13, v14, 2 + vmsne.vi v15, v17, 3 + vmsne.vi v14, v14, 3 + vmandn.mm v12, v12, v8 + vmand.mm v8, v15, v8 + vmor.mm v0, v8, v12 + vsetvli zero, zero, e32, m1, ta, mu + vmandn.mm v8, v13, v16 + vmand.mm v12, v14, v16 + vmor.mm v8, v12, v8 + vadd.vi v9, v9, 1, v0.t + vmv.v.v v0, v8 + vadd.vi v11, v11, 1, v0.t addiw a3, a3, 8 - vadd.vi v8, v8, 8 + vadd.vi v10, v10, 8 bne a2, a3, .LBB2_13 # %bb.14: # %middle.block93 - vadd.vv v8, v11, v10 + vadd.vv v8, v11, v9 vmv.s.x v9, zero vredsum.vs v8, v8, v9 vmv.x.s s3, v8 @@ -331,45 +330,43 @@ addiw a2, a2, -8 and a2, a0, a2 vsetivli zero, 4, e32, m1, ta, ma - vid.v v9 - vmv.v.i v10, 0 + vid.v v10 + vmv.v.i v9, 0 li a4, 255 li a5, 128 vmv.v.i v11, 0 - vmv.v.i v12, 0 .LBB2_21: # %vector.body # =>This Inner Loop Header: Depth=1 - vadd.vi v8, v9, 4 + vsetvli zero, zero, e32, m1, ta, ma + vadd.vi v8, v10, 4 srliw a6, a3, 8 slli a6, a6, 6 add a6, a1, a6 ld a6, 0(a6) - vand.vx v13, v9, a4 + vand.vx v12, v10, a4 vand.vx v8, v8, a4 vsetvli zero, zero, e64, m2, ta, ma - vzext.vf2 v14, v13 - vzext.vf2 v16, v8 + vzext.vf2 v14, v12 + vzext.vf2 v12, v8 vsll.vi v14, v14, 6 - vsll.vi v16, v16, 6 + vsll.vi v12, v12, 6 addi a6, a6, 48 vluxei64.v v14, (a6), v14 - vluxei64.v v16, (a6), v16 + vluxei64.v v12, (a6), v12 vsetvli zero, zero, e16, mf2, ta, ma vluxei64.v v8, (a5), v14 - vluxei64.v v13, (a5), v16 + vluxei64.v v14, (a5), v12 vmsne.vi v0, v8, 3 - vmsne.vi v8, v13, 3 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v13, v10, 1, v0 + vmsne.vi v8, v14, 3 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v9, v9, 1, v0.t vmv1r.v v0, v8 - vmerge.vim v8, v10, 1, v0 - vadd.vv v11, v11, v13 - vadd.vv v12, v12, v8 + vadd.vi v11, v11, 1, v0.t addiw a3, a3, 8 - vadd.vi v9, v9, 8 + vadd.vi v10, v10, 8 bne a2, a3, .LBB2_21 # %bb.22: # %middle.block - vadd.vv v8, v12, v11 + vadd.vv v8, v11, v9 vmv.s.x v9, zero vredsum.vs v8, v8, v9 vmv.x.s s3, v8 @@ -866,12 +863,11 @@ vmsne.vi v17, v17, 3 vmsne.vi v18, v18, 3 vmand.mm v0, v9, v17 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v11, 1, v0 - vmand.mm v0, v10, v18 - vadd.vv v13, v13, v9 - vmerge.vim v9, v11, 1, v0 - vadd.vv v15, v15, v9 + vsetvli zero, zero, e32, m1, ta, mu + vmand.mm v9, v10, v18 + vadd.vi v13, v13, 1, v0.t + vmv.v.v v0, v9 + vadd.vi v15, v15, 1, v0.t addiw s4, s4, 8 vadd.vi v16, v16, 8 bne s2, s4, .LBB5_21 --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-ssa-loop-prefetch.s 2024-04-01 12:40:59.682441116 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-ssa-loop-prefetch.s 2024-04-01 12:41:11.726105243 +0000 @@ -302,13 +302,14 @@ beqz a3, .LBB0_27 # %bb.29: # %for.body.lr.ph sd a3, 240(sp) # 8-byte Folded Spill - sd zero, 160(sp) # 8-byte Folded Spill + li s5, 0 li a0, -1 srli s8, a0, 32 csrr s11, vlenb srli s0, s11, 1 - slli s5, s11, 1 - srli s7, s11, 2 + slli s7, s11, 1 + srli a0, s11, 2 + sd a0, 64(sp) # 8-byte Folded Spill addi s9, s2, 8 lui a0, 16 addiw a0, a0, -1 @@ -316,7 +317,7 @@ li s6, 3 lui a0, 1 addiw a0, a0, -1140 - sd a0, 120(sp) # 8-byte Folded Spill + sd a0, 128(sp) # 8-byte Folded Spill vsetvli a0, zero, e8, mf4, ta, ma vmv.v.i v8, 1 addi a0, sp, 368 @@ -342,7 +343,7 @@ add a0, sp, a0 addi a0, a0, 368 vs2r.v v8, (a0) # Unknown-size Folded Spill - sd s3, 152(sp) # 8-byte Folded Spill + sd s3, 160(sp) # 8-byte Folded Spill sd s8, 216(sp) # 8-byte Folded Spill ld a0, 0(s3) beqz a0, .LBB0_32 @@ -362,18 +363,18 @@ call fprintf .LBB0_32: # %if.end56 # =>This Loop Header: Depth=1 - # Child Loop BB0_41 Depth 2 - # Child Loop BB0_49 Depth 3 - # Child Loop BB0_71 Depth 2 - # Child Loop BB0_73 Depth 3 - # Child Loop BB0_82 Depth 4 - # Child Loop BB0_92 Depth 5 - # Child Loop BB0_100 Depth 5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 - # Child Loop BB0_131 Depth 5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 + # Child Loop BB0_40 Depth 2 + # Child Loop BB0_48 Depth 3 + # Child Loop BB0_70 Depth 2 + # Child Loop BB0_72 Depth 3 + # Child Loop BB0_81 Depth 4 + # Child Loop BB0_91 Depth 5 + # Child Loop BB0_99 Depth 5 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 + # Child Loop BB0_130 Depth 5 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 # Child Loop BB0_159 Depth 2 # Child Loop BB0_161 Depth 3 # Child Loop BB0_191 Depth 2 @@ -410,7 +411,7 @@ ld s1, 240(sp) # 8-byte Folded Reload mv a0, s1 call optimize_loop_nest_for_size_p - beqz a0, .LBB0_37 + beqz a0, .LBB0_36 # %bb.33: # %if.then.i # in Loop: Header=BB0_32 Depth=1 ld a3, 0(s3) @@ -423,83 +424,74 @@ ld a0, %pcrel_lo(.Lpcrel_hi22)(a0) lbu a0, 0(a0) andi a0, a0, 8 - beqz a0, .LBB0_36 -# %bb.35: # %if.then3.i - # in Loop: Header=BB0_32 Depth=1 -.Lpcrel_hi23: - auipc a0, %pcrel_hi(.L.str.15) - addi a0, a0, %pcrel_lo(.Lpcrel_hi23) - li a1, 22 - li a2, 1 - call fwrite -.LBB0_36: # in Loop: Header=BB0_32 Depth=1 + bnez a0, .LBB0_156 +# %bb.35: # in Loop: Header=BB0_32 Depth=1 li s1, 0 - ld a0, 160(sp) # 8-byte Folded Reload j .LBB0_381 -.LBB0_37: # %if.end5.i +.LBB0_36: # %if.end5.i # in Loop: Header=BB0_32 Depth=1 - sd s4, 136(sp) # 8-byte Folded Spill + sd s4, 144(sp) # 8-byte Folded Spill mv a0, s1 call get_loop_body_in_dom_order lwu a1, 36(s1) mv s4, a0 sd zero, 336(sp) beqz a1, .LBB0_177 -# %bb.38: # %for.body.i.i.preheader +# %bb.37: # %for.body.i.i.preheader # in Loop: Header=BB0_32 Depth=1 - sd s7, 64(sp) # 8-byte Folded Spill - sd s5, 56(sp) # 8-byte Folded Spill + sd s7, 56(sp) # 8-byte Folded Spill + sd s5, 72(sp) # 8-byte Folded Spill sd zero, 184(sp) # 8-byte Folded Spill li s7, 0 li s5, 1 ld a3, 232(sp) # 8-byte Folded Reload ld s1, 240(sp) # 8-byte Folded Reload - sd s2, 96(sp) # 8-byte Folded Spill - sd s9, 80(sp) # 8-byte Folded Spill - j .LBB0_41 -.LBB0_39: # %for.inc46.loopexit.i.i - # in Loop: Header=BB0_41 Depth=2 + sd s2, 104(sp) # 8-byte Folded Spill + sd s9, 88(sp) # 8-byte Folded Spill + j .LBB0_40 +.LBB0_38: # %for.inc46.loopexit.i.i + # in Loop: Header=BB0_40 Depth=2 lwu a1, 36(s1) - ld s3, 152(sp) # 8-byte Folded Reload - ld s2, 96(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s2, 104(sp) # 8-byte Folded Reload ld s8, 216(sp) # 8-byte Folded Reload - ld s9, 80(sp) # 8-byte Folded Reload -.LBB0_40: # %for.inc46.i.i - # in Loop: Header=BB0_41 Depth=2 + ld s9, 88(sp) # 8-byte Folded Reload +.LBB0_39: # %for.inc46.i.i + # in Loop: Header=BB0_40 Depth=2 addi s7, s7, 1 - bgeu s7, a1, .LBB0_68 -.LBB0_41: # %for.body.i.i + bgeu s7, a1, .LBB0_67 +.LBB0_40: # %for.body.i.i # Parent Loop BB0_32 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_49 Depth 3 + # Child Loop BB0_48 Depth 3 slli a0, s7, 3 add a0, s4, a0 ld a0, 0(a0) ld a2, 24(a0) - bne a2, s1, .LBB0_40 -# %bb.42: # %if.end.i.i26 - # in Loop: Header=BB0_41 Depth=2 + bne a2, s1, .LBB0_39 +# %bb.41: # %if.end.i.i26 + # in Loop: Header=BB0_40 Depth=2 lbu a2, 97(a0) andi a2, a2, 2 - bnez a2, .LBB0_40 -# %bb.43: # %land.lhs.true.i.i.i.i - # in Loop: Header=BB0_41 Depth=2 + bnez a2, .LBB0_39 +# %bb.42: # %land.lhs.true.i.i.i.i + # in Loop: Header=BB0_40 Depth=2 ld a0, 64(a0) - beqz a0, .LBB0_40 -# %bb.44: # %bb_seq.exit.i.i.i - # in Loop: Header=BB0_41 Depth=2 + beqz a0, .LBB0_39 +# %bb.43: # %bb_seq.exit.i.i.i + # in Loop: Header=BB0_40 Depth=2 ld a0, 0(a0) - beqz a0, .LBB0_40 -# %bb.45: # %gsi_start_bb.exit.i.i - # in Loop: Header=BB0_41 Depth=2 + beqz a0, .LBB0_39 +# %bb.44: # %gsi_start_bb.exit.i.i + # in Loop: Header=BB0_40 Depth=2 ld s3, 0(a0) - beqz s3, .LBB0_67 -# %bb.46: # %for.body4.i.i.preheader - # in Loop: Header=BB0_41 Depth=2 + beqz s3, .LBB0_66 +# %bb.45: # %for.body4.i.i.preheader + # in Loop: Header=BB0_40 Depth=2 li s8, 6 - j .LBB0_49 -.LBB0_47: # %if.then38.i.i - # in Loop: Header=BB0_49 Depth=3 + j .LBB0_48 +.LBB0_46: # %if.then38.i.i + # in Loop: Header=BB0_48 Depth=3 addi a1, sp, 336 li a3, 1 ld a0, 240(sp) # 8-byte Folded Reload @@ -512,19 +504,19 @@ ld a0, 184(sp) # 8-byte Folded Reload addiw a0, a0, 1 sd a0, 184(sp) # 8-byte Folded Spill -.LBB0_48: # %for.inc.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_47: # %for.inc.i.i + # in Loop: Header=BB0_48 Depth=3 ld s3, 16(s3) - beqz s3, .LBB0_39 -.LBB0_49: # %for.body4.i.i + beqz s3, .LBB0_38 +.LBB0_48: # %for.body4.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_41 Depth=2 + # Parent Loop BB0_40 Depth=2 # => This Inner Loop Header: Depth=3 ld s2, 0(s3) lbu a0, 0(s2) - bne a0, s8, .LBB0_52 -# %bb.50: # %if.then.i.i.i.i - # in Loop: Header=BB0_49 Depth=3 + bne a0, s8, .LBB0_51 +# %bb.49: # %if.then.i.i.i.i + # in Loop: Header=BB0_48 Depth=3 mv s9, s5 .Lpcrel_hi24: auipc a0, %got_pcrel_hi(gss_for_code_) @@ -536,39 +528,39 @@ slli a0, a0, 3 add a0, s8, a0 ld a0, 0(a0) - beqz a0, .LBB0_57 -# %bb.51: # %if.then.i.i31.i.thread.i - # in Loop: Header=BB0_49 Depth=3 + beqz a0, .LBB0_56 +# %bb.50: # %if.then.i.i31.i.thread.i + # in Loop: Header=BB0_48 Depth=3 add a1, s2, a0 ld s5, 0(a1) - j .LBB0_62 -.LBB0_52: # %if.then9.i.i - # in Loop: Header=BB0_49 Depth=3 + j .LBB0_61 +.LBB0_51: # %if.then9.i.i + # in Loop: Header=BB0_48 Depth=3 andi a1, a0, 254 addi a1, a1, -10 li a2, -4 - bltu a1, a2, .LBB0_54 -# %bb.53: # %gimple_vuse.exit.i.i - # in Loop: Header=BB0_49 Depth=3 + bltu a1, a2, .LBB0_53 +# %bb.52: # %gimple_vuse.exit.i.i + # in Loop: Header=BB0_48 Depth=3 ld a1, 56(s2) - bnez a1, .LBB0_56 -.LBB0_54: # %lor.lhs.false.i.i - # in Loop: Header=BB0_49 Depth=3 + bnez a1, .LBB0_55 +.LBB0_53: # %lor.lhs.false.i.i + # in Loop: Header=BB0_48 Depth=3 li a1, 8 - bne a0, a1, .LBB0_48 -# %bb.55: # %land.lhs.true.i.i - # in Loop: Header=BB0_49 Depth=3 + bne a0, a1, .LBB0_47 +# %bb.54: # %land.lhs.true.i.i + # in Loop: Header=BB0_48 Depth=3 mv a0, s2 call gimple_call_flags ld a3, 232(sp) # 8-byte Folded Reload andi a0, a0, 1 - bnez a0, .LBB0_48 -.LBB0_56: # %if.then16.i.i - # in Loop: Header=BB0_49 Depth=3 + bnez a0, .LBB0_47 +.LBB0_55: # %if.then16.i.i + # in Loop: Header=BB0_48 Depth=3 li s5, 0 - j .LBB0_48 -.LBB0_57: # %gimple_assign_lhs.exit.i.i - # in Loop: Header=BB0_49 Depth=3 + j .LBB0_47 +.LBB0_56: # %gimple_assign_lhs.exit.i.i + # in Loop: Header=BB0_48 Depth=3 .Lpcrel_hi26: auipc a0, %pcrel_hi(.L.str.17) addi a0, a0, %pcrel_lo(.Lpcrel_hi26) @@ -581,23 +573,23 @@ ld s5, 0(s2) addi a1, a0, -10 li a2, -9 - bgeu a1, a2, .LBB0_59 -# %bb.58: # in Loop: Header=BB0_49 Depth=3 + bgeu a1, a2, .LBB0_58 +# %bb.57: # in Loop: Header=BB0_48 Depth=3 li a2, 0 ld a3, 232(sp) # 8-byte Folded Reload li s8, 6 - j .LBB0_63 -.LBB0_59: # %if.then.i.i31.i.i - # in Loop: Header=BB0_49 Depth=3 + j .LBB0_62 +.LBB0_58: # %if.then.i.i31.i.i + # in Loop: Header=BB0_48 Depth=3 slli a0, a0, 2 add a0, s1, a0 lwu a0, 0(a0) slli a0, a0, 3 add a0, s8, a0 ld a0, 0(a0) - bnez a0, .LBB0_61 -# %bb.60: # %cond.true.i.i.i40.i.i - # in Loop: Header=BB0_49 Depth=3 + bnez a0, .LBB0_60 +# %bb.59: # %cond.true.i.i.i40.i.i + # in Loop: Header=BB0_48 Depth=3 .Lpcrel_hi28: auipc a0, %pcrel_hi(.L.str.17) addi a0, a0, %pcrel_lo(.Lpcrel_hi28) @@ -607,16 +599,16 @@ li a1, 1622 call fancy_abort li a0, 0 -.LBB0_61: # %gimple_ops.exit.i.i37.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_60: # %gimple_ops.exit.i.i37.i.i + # in Loop: Header=BB0_48 Depth=3 ld a3, 232(sp) # 8-byte Folded Reload -.LBB0_62: # %gimple_ops.exit.i.i37.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_61: # %gimple_ops.exit.i.i37.i.i + # in Loop: Header=BB0_48 Depth=3 li s8, 6 add a0, s2, a0 ld a2, 8(a0) -.LBB0_63: # %gimple_assign_rhs1.exit.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_62: # %gimple_assign_rhs1.exit.i.i + # in Loop: Header=BB0_48 Depth=3 lwu a0, 0(a2) .Lpcrel_hi30: auipc a1, %got_pcrel_hi(tree_code_type) @@ -626,9 +618,9 @@ add a0, s1, a0 lw a0, 0(a0) li a1, 4 - bne a0, a1, .LBB0_65 -# %bb.64: # %if.then25.i.i - # in Loop: Header=BB0_49 Depth=3 + bne a0, a1, .LBB0_64 +# %bb.63: # %if.then25.i.i + # in Loop: Header=BB0_48 Depth=3 addi a1, sp, 336 ld a0, 240(sp) # 8-byte Folded Reload li a3, 0 @@ -639,33 +631,32 @@ ld a0, 184(sp) # 8-byte Folded Reload addiw a0, a0, 1 sd a0, 184(sp) # 8-byte Folded Spill -.LBB0_65: # %if.end31.i.i - # in Loop: Header=BB0_49 Depth=3 +.LBB0_64: # %if.end31.i.i + # in Loop: Header=BB0_48 Depth=3 lwu a0, 0(s5) and a0, a0, a3 slli a0, a0, 2 add a0, s1, a0 lw a0, 0(a0) li a1, 4 - beq a0, a1, .LBB0_47 -# %bb.66: # in Loop: Header=BB0_49 Depth=3 + beq a0, a1, .LBB0_46 +# %bb.65: # in Loop: Header=BB0_48 Depth=3 ld s1, 240(sp) # 8-byte Folded Reload mv s5, s9 - j .LBB0_48 -.LBB0_67: # in Loop: Header=BB0_41 Depth=2 - ld s3, 152(sp) # 8-byte Folded Reload - j .LBB0_40 -.LBB0_68: # %gather_memory_references.exit.i + j .LBB0_47 +.LBB0_66: # in Loop: Header=BB0_40 Depth=2 + ld s3, 160(sp) # 8-byte Folded Reload + j .LBB0_39 +.LBB0_67: # %gather_memory_references.exit.i # in Loop: Header=BB0_32 Depth=1 - ld s1, 336(sp) + sd s5, 136(sp) # 8-byte Folded Spill + ld s5, 336(sp) mv a0, s4 call free - beqz s1, .LBB0_178 -# %bb.69: # %for.body.i32.i.preheader + beqz s5, .LBB0_178 +# %bb.68: # %for.body.i32.i.preheader # in Loop: Header=BB0_32 Depth=1 - sd s5, 128(sp) # 8-byte Folded Spill - sd s1, 72(sp) # 8-byte Folded Spill - mv s2, s1 + sd s5, 80(sp) # 8-byte Folded Spill csrr a0, vlenb slli a0, a0, 2 add a0, sp, a0 @@ -681,170 +672,170 @@ add a0, sp, a0 addi a0, a0, 368 vl2r.v v28, (a0) # Unknown-size Folded Reload - j .LBB0_71 -.LBB0_70: # %prune_group_by_reuse.exit.i.i - # in Loop: Header=BB0_71 Depth=2 - ld s2, 24(s2) - beqz s2, .LBB0_157 -.LBB0_71: # %for.body.i32.i + j .LBB0_70 +.LBB0_69: # %prune_group_by_reuse.exit.i.i + # in Loop: Header=BB0_70 Depth=2 + ld s5, 24(s5) + beqz s5, .LBB0_157 +.LBB0_70: # %for.body.i32.i # Parent Loop BB0_32 Depth=1 # => This Loop Header: Depth=2 - # Child Loop BB0_73 Depth 3 - # Child Loop BB0_82 Depth 4 - # Child Loop BB0_92 Depth 5 - # Child Loop BB0_100 Depth 5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 - # Child Loop BB0_131 Depth 5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 - ld s5, 16(s2) - mv s4, s5 - bnez s5, .LBB0_73 - j .LBB0_70 -.LBB0_72: # %for.bodythread-pre-split.i.i.i - # in Loop: Header=BB0_73 Depth=3 - ld s5, 16(s2) -.LBB0_73: # %for.body.i.i.i + # Child Loop BB0_72 Depth 3 + # Child Loop BB0_81 Depth 4 + # Child Loop BB0_91 Depth 5 + # Child Loop BB0_99 Depth 5 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 + # Child Loop BB0_130 Depth 5 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 + ld s2, 16(s5) + mv s4, s2 + bnez s2, .LBB0_72 + j .LBB0_69 +.LBB0_71: # %for.bodythread-pre-split.i.i.i + # in Loop: Header=BB0_72 Depth=3 + ld s2, 16(s5) +.LBB0_72: # %for.body.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 + # Parent Loop BB0_70 Depth=2 # => This Loop Header: Depth=3 - # Child Loop BB0_82 Depth 4 - # Child Loop BB0_92 Depth 5 - # Child Loop BB0_100 Depth 5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 - # Child Loop BB0_131 Depth 5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 + # Child Loop BB0_81 Depth 4 + # Child Loop BB0_91 Depth 5 + # Child Loop BB0_99 Depth 5 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 + # Child Loop BB0_130 Depth 5 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 ld a0, 24(s4) ld a0, 8(a0) - beqz a0, .LBB0_76 -# %bb.74: # %if.end.i.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beqz a0, .LBB0_75 +# %bb.73: # %if.end.i.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a1, 0(s10) addi a1, a1, 2047 lw a1, 1161(a1) srai a2, a0, 63 xor a0, a0, a2 sub a0, a0, a2 - blt a1, a0, .LBB0_78 -# %bb.75: # %if.end11.i.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 + blt a1, a0, .LBB0_77 +# %bb.74: # %if.end11.i.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 divuw a0, a1, a0 slli a0, a0, 32 srli a0, a0, 32 li a1, 32 - j .LBB0_77 -.LBB0_76: # in Loop: Header=BB0_73 Depth=3 + j .LBB0_76 +.LBB0_75: # in Loop: Header=BB0_72 Depth=3 li a0, 1 li a1, 40 -.LBB0_77: # %cleanup.sink.split.i.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_76: # %cleanup.sink.split.i.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 add a1, s4, a1 sd a0, 0(a1) -.LBB0_78: # %prune_ref_by_self_reuse.exit.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 - beqz s5, .LBB0_140 -# %bb.79: # %for.body.lr.ph.i.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_77: # %prune_ref_by_self_reuse.exit.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 + beqz s2, .LBB0_139 +# %bb.78: # %for.body.lr.ph.i.i.i.i + # in Loop: Header=BB0_72 Depth=3 li s7, 1 - j .LBB0_82 -.LBB0_80: # in Loop: Header=BB0_82 Depth=4 + j .LBB0_81 +.LBB0_79: # in Loop: Header=BB0_81 Depth=4 li a0, 0 -.LBB0_81: # %for.inc.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - ld s5, 56(s5) +.LBB0_80: # %for.inc.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + ld s2, 56(s2) mv s7, a0 - beqz s5, .LBB0_140 -.LBB0_82: # %for.body.i.i.i.i + beqz s2, .LBB0_139 +.LBB0_81: # %for.body.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 # => This Loop Header: Depth=4 - # Child Loop BB0_92 Depth 5 - # Child Loop BB0_100 Depth 5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 - # Child Loop BB0_131 Depth 5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 - beq s5, s4, .LBB0_80 -# %bb.83: # %if.end.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + # Child Loop BB0_91 Depth 5 + # Child Loop BB0_99 Depth 5 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 + # Child Loop BB0_130 Depth 5 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 + beq s2, s4, .LBB0_79 +# %bb.82: # %if.end.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 lbu a0, 64(s4) andi a0, a0, 1 - bnez a0, .LBB0_85 -# %bb.84: # %land.lhs.true.i.i.i36.i - # in Loop: Header=BB0_82 Depth=4 - lbu a0, 64(s5) + bnez a0, .LBB0_84 +# %bb.83: # %land.lhs.true.i.i.i36.i + # in Loop: Header=BB0_81 Depth=4 + lbu a0, 64(s2) andi a0, a0, 1 - bnez a0, .LBB0_109 -.LBB0_85: # %if.end8.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bnez a0, .LBB0_108 +.LBB0_84: # %if.end8.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a2, 16(s4) - ld a3, 16(s5) + ld a3, 16(s2) sub a4, a3, a2 - beqz a4, .LBB0_107 -# %bb.86: # %if.end9.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + beqz a4, .LBB0_106 +# %bb.85: # %if.end9.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a0, 24(s4) ld a0, 8(a0) - beqz a0, .LBB0_110 -# %bb.87: # %if.end27.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bltz a0, .LBB0_115 -# %bb.88: # %if.else.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bltz a4, .LBB0_109 -# %bb.89: # %if.else.if.end50_crit_edge.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + beqz a0, .LBB0_109 +# %bb.86: # %if.end27.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + bltz a0, .LBB0_114 +# %bb.87: # %if.else.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + bltz a4, .LBB0_108 +# %bb.88: # %if.else.if.end50_crit_edge.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a1, 0(s10) addi a1, a1, 2047 lw a1, 1161(a1) - bge a1, a0, .LBB0_117 -.LBB0_90: # %while.cond.preheader.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bge a1, a0, .LBB0_116 +.LBB0_89: # %while.cond.preheader.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 mv a3, a1 li a2, 2 - bltu a1, a2, .LBB0_94 -# %bb.91: # %while.cond.preheader.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu a1, a2, .LBB0_93 +# %bb.90: # %while.cond.preheader.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 andi a5, a0, 1 mv a3, a1 mv a2, a0 - bnez a5, .LBB0_94 -.LBB0_92: # %while.body.i.i.i.i.i + bnez a5, .LBB0_93 +.LBB0_91: # %while.body.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 # => This Inner Loop Header: Depth=5 mv a5, a3 andi a6, a2, 2 srli a3, a3, 1 - bnez a6, .LBB0_94 -# %bb.93: # %while.body.i.i.i.i.i - # in Loop: Header=BB0_92 Depth=5 + bnez a6, .LBB0_93 +# %bb.92: # %while.body.i.i.i.i.i + # in Loop: Header=BB0_91 Depth=5 srai a2, a2, 1 - bltu s6, a5, .LBB0_92 -.LBB0_94: # %while.end.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu s6, a5, .LBB0_91 +.LBB0_93: # %while.end.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 div a2, a4, a0 - beqz a1, .LBB0_118 -# %bb.95: # %for.cond2.preheader.lr.ph.i.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - beqz a3, .LBB0_118 -# %bb.96: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader - # in Loop: Header=BB0_82 Depth=4 + beqz a1, .LBB0_117 +# %bb.94: # %for.cond2.preheader.lr.ph.i.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + beqz a3, .LBB0_117 +# %bb.95: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader + # in Loop: Header=BB0_81 Depth=4 li a5, 8 mv a7, s0 - bltu a5, s0, .LBB0_98 -# %bb.97: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader - # in Loop: Header=BB0_82 Depth=4 + bltu a5, s0, .LBB0_97 +# %bb.96: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader + # in Loop: Header=BB0_81 Depth=4 li a7, 8 -.LBB0_98: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader - # in Loop: Header=BB0_82 Depth=4 +.LBB0_97: # %for.cond2.preheader.us.i.i.i.i.i.i.preheader + # in Loop: Header=BB0_81 Depth=4 ld a5, 8(s4) ld a5, 16(a5) li t1, 0 @@ -861,32 +852,33 @@ snez a4, a4 or a4, a6, a4 srli a5, a5, 3 - j .LBB0_100 -.LBB0_99: # %for.cond2.for.inc22_crit_edge.us.i.i.i.i.i.i - # in Loop: Header=BB0_100 Depth=5 + j .LBB0_99 +.LBB0_98: # %for.cond2.for.inc22_crit_edge.us.i.i.i.i.i.i + # in Loop: Header=BB0_99 Depth=5 add t1, t1, a5 slli t4, t1, 32 srli t4, t4, 32 - bgeu t4, a1, .LBB0_125 -.LBB0_100: # %for.cond2.preheader.us.i.i.i.i.i.i + bgeu t4, a1, .LBB0_124 +.LBB0_99: # %for.cond2.preheader.us.i.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 # => This Loop Header: Depth=5 - # Child Loop BB0_103 Depth 6 - # Child Loop BB0_106 Depth 6 + # Child Loop BB0_102 Depth 6 + # Child Loop BB0_105 Depth 6 sext.w a6, a3 sltu t4, a6, a7 or t4, t4, a4 - beqz t4, .LBB0_102 -# %bb.101: # in Loop: Header=BB0_100 Depth=5 + beqz t4, .LBB0_101 +# %bb.100: # in Loop: Header=BB0_99 Depth=5 li t4, 0 - j .LBB0_105 -.LBB0_102: # %vector.ph139 - # in Loop: Header=BB0_100 Depth=5 + j .LBB0_104 +.LBB0_101: # %vector.ph139 + # in Loop: Header=BB0_99 Depth=5 srli t5, s11, 1 neg t4, t5 + and t4, t4, a3 vsetvli t6, zero, e32, m1, tu, ma vmv1r.v v12, v26 vmv.s.x v12, t2 @@ -894,62 +886,59 @@ vmv1r.v v13, v26 vmv.s.x v13, t3 vmv2r.v v10, v28 - and t4, t4, a3 vmv1r.v v8, v12 vmv1r.v v10, v13 - vsetvli t2, zero, e32, m2, ta, ma - vmv.v.x v12, t1 mv t2, t4 - vmv2r.v v14, v24 -.LBB0_103: # %vector.body144 + vmv2r.v v12, v24 +.LBB0_102: # %vector.body144 # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 - # Parent Loop BB0_100 Depth=5 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 + # Parent Loop BB0_99 Depth=5 # => This Inner Loop Header: Depth=6 - vmv2r.v v16, v14 - vmadd.vx v16, a0, v12 + vsetvli t3, zero, e32, m2, ta, ma + vmv.v.x v14, t1 + vmacc.vx v14, a0, v12 vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v20, v16 + vsext.vf2 v16, v14 vsetvli zero, zero, e32, m2, ta, ma - vadd.vx v16, v16, t0 + vadd.vx v14, v14, t0 vsetvli zero, zero, e64, m4, ta, ma - vdivu.vx v20, v20, a1 + vdivu.vx v16, v16, a1 vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v18, v20, 0 + vnsrl.wi v20, v16, 0 vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v20, v16 - vdivu.vx v20, v20, a1 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v16, v20, 0 - vmsne.vv v0, v18, v16 + vsext.vf2 v16, v14 + vdivu.vx v16, v16, a1 + vsetvli zero, zero, e32, m2, ta, mu + vnsrl.wi v14, v16, 0 + vmsne.vv v0, v20, v14 vadd.vi v8, v8, 1 - vmerge.vim v16, v28, 1, v0 - vadd.vv v10, v10, v16 + vadd.vi v10, v10, 1, v0.t subw t2, t2, s0 - vadd.vx v14, v14, t5 - bnez t2, .LBB0_103 -# %bb.104: # %middle.block136 - # in Loop: Header=BB0_100 Depth=5 + vadd.vx v12, v12, t5 + bnez t2, .LBB0_102 +# %bb.103: # %middle.block136 + # in Loop: Header=BB0_99 Depth=5 vmv.s.x v12, zero vredsum.vs v8, v8, v12 vmv.x.s t2, v8 vredsum.vs v8, v10, v12 sext.w t5, t4 vmv.x.s t3, v8 - beq t5, a6, .LBB0_99 -.LBB0_105: # %for.body6.us.i.i.i.i.i.i.preheader - # in Loop: Header=BB0_100 Depth=5 + beq t5, a6, .LBB0_98 +.LBB0_104: # %for.body6.us.i.i.i.i.i.i.preheader + # in Loop: Header=BB0_99 Depth=5 addi t5, t4, 1 mul t4, a0, t4 addw t4, t1, t4 -.LBB0_106: # %for.body6.us.i.i.i.i.i.i +.LBB0_105: # %for.body6.us.i.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 - # Parent Loop BB0_100 Depth=5 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 + # Parent Loop BB0_99 Depth=5 # => This Inner Loop Header: Depth=6 addw t6, t0, t4 divu s1, t4, a1 @@ -964,46 +953,46 @@ srli t6, t6, 32 addi t5, t5, 1 addw t4, t4, a0 - bltu t6, a3, .LBB0_106 - j .LBB0_99 -.LBB0_107: # %if.then.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu t6, a3, .LBB0_105 + j .LBB0_98 +.LBB0_106: # %if.then.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li a2, 0 li a0, 0 - beqz s7, .LBB0_81 -.LBB0_108: # %for.inc.sink.split.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + beqz s7, .LBB0_80 +.LBB0_107: # %for.inc.sink.split.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 sd a2, 40(s4) -.LBB0_109: # %for.inc.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 +.LBB0_108: # %for.inc.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 mv a0, s7 - j .LBB0_81 -.LBB0_110: # %if.then11.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - beqz s7, .LBB0_80 -# %bb.111: # %if.end14.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + j .LBB0_80 +.LBB0_109: # %if.then11.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + beqz s7, .LBB0_79 +# %bb.110: # %if.end14.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a0, 0(s10) addi a0, a0, 2047 lw a0, 1161(a0) - beqz a0, .LBB0_119 + beqz a0, .LBB0_118 +# %bb.111: # %ddown.exit59.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + bltz a2, .LBB0_121 # %bb.112: # %ddown.exit59.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bltz a2, .LBB0_122 -# %bb.113: # %ddown.exit59.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bltz a3, .LBB0_123 -.LBB0_114: # %ddown.exit59.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + # in Loop: Header=BB0_81 Depth=4 + bltz a3, .LBB0_122 +.LBB0_113: # %ddown.exit59.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 divu a1, a2, a0 divu a0, a3, a0 - bne a1, a0, .LBB0_109 - j .LBB0_124 -.LBB0_115: # %if.then29.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 - bgtz a4, .LBB0_109 -# %bb.116: # %if.end33.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bne a1, a0, .LBB0_108 + j .LBB0_123 +.LBB0_114: # %if.then29.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 + bgtz a4, .LBB0_108 +# %bb.115: # %if.end33.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a1, 0(s10) addi a1, a1, 2047 lw a1, 1161(a1) @@ -1012,9 +1001,9 @@ addiw a5, a1, -1 sub a2, a5, a2 sub a3, a5, a3 - blt a1, a0, .LBB0_90 -.LBB0_117: # %ddown.exit67.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + blt a1, a0, .LBB0_89 +.LBB0_116: # %ddown.exit67.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 addi a4, a1, -1 srai a5, a3, 63 and a4, a5, a4 @@ -1026,15 +1015,15 @@ add a2, a2, a3 sub a2, a2, a1 div a2, a2, a0 - bltu a2, a4, .LBB0_108 - j .LBB0_109 -.LBB0_118: # in Loop: Header=BB0_82 Depth=4 + bltu a2, a4, .LBB0_107 + j .LBB0_108 +.LBB0_117: # in Loop: Header=BB0_81 Depth=4 divw a0, zero, zero li a1, 50 - blt a1, a0, .LBB0_109 - j .LBB0_139 -.LBB0_119: # %ddown.exit.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + blt a1, a0, .LBB0_108 + j .LBB0_138 +.LBB0_118: # %ddown.exit.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 .Lpcrel_hi31: auipc a0, %pcrel_hi(.L.str.11) addi a0, a0, %pcrel_lo(.Lpcrel_hi31) @@ -1046,9 +1035,9 @@ ld a0, 0(s10) addi a0, a0, 2047 lw a0, 1161(a0) - bnez a0, .LBB0_121 -# %bb.120: # %cond.true.i58.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bnez a0, .LBB0_120 +# %bb.119: # %cond.true.i58.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 .Lpcrel_hi33: auipc a0, %pcrel_hi(.L.str.11) addi a0, a0, %pcrel_lo(.Lpcrel_hi33) @@ -1057,7 +1046,7 @@ addi a2, a1, %pcrel_lo(.Lpcrel_hi34) li a1, 588 call fancy_abort -.LBB0_121: # in Loop: Header=BB0_82 Depth=4 +.LBB0_120: # in Loop: Header=BB0_81 Depth=4 li a2, 0 csrr a0, vlenb slli a0, a0, 2 @@ -1074,69 +1063,69 @@ add a0, sp, a0 addi a0, a0, 368 vl2r.v v28, (a0) # Unknown-size Folded Reload - j .LBB0_108 -.LBB0_122: # in Loop: Header=BB0_82 Depth=4 + j .LBB0_107 +.LBB0_121: # in Loop: Header=BB0_81 Depth=4 add a2, a2, a0 addi a2, a2, -1 - bgez a3, .LBB0_114 -.LBB0_123: # in Loop: Header=BB0_82 Depth=4 + bgez a3, .LBB0_113 +.LBB0_122: # in Loop: Header=BB0_81 Depth=4 add a3, a3, a0 addi a3, a3, -1 divu a1, a2, a0 divu a0, a3, a0 - bne a1, a0, .LBB0_109 -.LBB0_124: # in Loop: Header=BB0_82 Depth=4 + bne a1, a0, .LBB0_108 +.LBB0_123: # in Loop: Header=BB0_81 Depth=4 li a2, 0 - j .LBB0_108 -.LBB0_125: # %compute_miss_rate.exit.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + j .LBB0_107 +.LBB0_124: # %compute_miss_rate.exit.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li a7, 1000 mul a7, t3, a7 divw a7, a7, t2 li t1, 50 - blt t1, a7, .LBB0_127 -# %bb.126: # %if.then86.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + blt t1, a7, .LBB0_126 +# %bb.125: # %if.then86.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a0, 40(s4) - bltu a2, a0, .LBB0_108 - j .LBB0_109 -.LBB0_127: # %if.end93.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu a2, a0, .LBB0_107 + j .LBB0_108 +.LBB0_126: # %if.end93.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 srli a7, s11, 1 li t1, 8 - bltu t1, a7, .LBB0_129 -# %bb.128: # %if.end93.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu t1, a7, .LBB0_128 +# %bb.127: # %if.end93.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li a7, 8 -.LBB0_129: # %if.end93.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 +.LBB0_128: # %if.end93.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li t1, 0 li t2, 0 li t3, 0 sub t0, a0, t0 - j .LBB0_131 -.LBB0_130: # %for.cond2.for.inc22_crit_edge.us.i94.i.i.i.i.i - # in Loop: Header=BB0_131 Depth=5 + j .LBB0_130 +.LBB0_129: # %for.cond2.for.inc22_crit_edge.us.i94.i.i.i.i.i + # in Loop: Header=BB0_130 Depth=5 add t1, t1, a5 slli t4, t1, 32 srli t4, t4, 32 - bgeu t4, a1, .LBB0_138 -.LBB0_131: # %for.cond2.preheader.us.i71.i.i.i.i.i + bgeu t4, a1, .LBB0_137 +.LBB0_130: # %for.cond2.preheader.us.i71.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 # => This Loop Header: Depth=5 - # Child Loop BB0_134 Depth 6 - # Child Loop BB0_137 Depth 6 + # Child Loop BB0_133 Depth 6 + # Child Loop BB0_136 Depth 6 sltu t4, a6, a7 or t4, t4, a4 - beqz t4, .LBB0_133 -# %bb.132: # in Loop: Header=BB0_131 Depth=5 + beqz t4, .LBB0_132 +# %bb.131: # in Loop: Header=BB0_130 Depth=5 li t4, 0 - j .LBB0_136 -.LBB0_133: # %vector.ph115 - # in Loop: Header=BB0_131 Depth=5 + j .LBB0_135 +.LBB0_132: # %vector.ph115 + # in Loop: Header=BB0_130 Depth=5 srli t5, s11, 1 neg t4, t5 vsetvli t6, zero, e32, m1, tu, ma @@ -1153,13 +1142,14 @@ vmv.v.x v12, t1 mv t2, t4 vmv2r.v v14, v24 -.LBB0_134: # %vector.body120 +.LBB0_133: # %vector.body120 # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 - # Parent Loop BB0_131 Depth=5 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 + # Parent Loop BB0_130 Depth=5 # => This Inner Loop Header: Depth=6 + vsetvli zero, zero, e32, m2, ta, ma vmv2r.v v16, v14 vmadd.vx v16, a0, v12 vsetvli zero, zero, e64, m4, ta, ma @@ -1173,35 +1163,34 @@ vsetvli zero, zero, e64, m4, ta, ma vsext.vf2 v20, v16 vdivu.vx v20, v20, a1 - vsetvli zero, zero, e32, m2, ta, ma + vsetvli zero, zero, e32, m2, ta, mu vnsrl.wi v16, v20, 0 vmsne.vv v0, v18, v16 vadd.vi v8, v8, 1 - vmerge.vim v16, v28, 1, v0 - vadd.vv v10, v10, v16 + vadd.vi v10, v10, 1, v0.t subw t2, t2, s0 vadd.vx v14, v14, t5 - bnez t2, .LBB0_134 -# %bb.135: # %middle.block112 - # in Loop: Header=BB0_131 Depth=5 + bnez t2, .LBB0_133 +# %bb.134: # %middle.block112 + # in Loop: Header=BB0_130 Depth=5 vmv.s.x v12, zero vredsum.vs v8, v8, v12 vmv.x.s t2, v8 vredsum.vs v8, v10, v12 sext.w t5, t4 vmv.x.s t3, v8 - beq t5, a6, .LBB0_130 -.LBB0_136: # %for.body6.us.i75.i.i.i.i.i.preheader - # in Loop: Header=BB0_131 Depth=5 + beq t5, a6, .LBB0_129 +.LBB0_135: # %for.body6.us.i75.i.i.i.i.i.preheader + # in Loop: Header=BB0_130 Depth=5 addi t5, t4, 1 mul t4, a0, t4 addw t4, t1, t4 -.LBB0_137: # %for.body6.us.i75.i.i.i.i.i +.LBB0_136: # %for.body6.us.i75.i.i.i.i.i # Parent Loop BB0_32 Depth=1 - # Parent Loop BB0_71 Depth=2 - # Parent Loop BB0_73 Depth=3 - # Parent Loop BB0_82 Depth=4 - # Parent Loop BB0_131 Depth=5 + # Parent Loop BB0_70 Depth=2 + # Parent Loop BB0_72 Depth=3 + # Parent Loop BB0_81 Depth=4 + # Parent Loop BB0_130 Depth=5 # => This Inner Loop Header: Depth=6 addw t6, t0, t4 divu s1, t4, a1 @@ -1216,78 +1205,78 @@ srli t6, t6, 32 addi t5, t5, 1 addw t4, t4, a0 - bltu t6, a3, .LBB0_137 - j .LBB0_130 -.LBB0_138: # %for.end24.loopexit22.i98.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + bltu t6, a3, .LBB0_136 + j .LBB0_129 +.LBB0_137: # %for.end24.loopexit22.i98.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 li a0, 1000 mul a0, t3, a0 divw a0, a0, t2 li a1, 50 - blt a1, a0, .LBB0_109 -.LBB0_139: # %if.then98.i.i.i.i.i - # in Loop: Header=BB0_82 Depth=4 + blt a1, a0, .LBB0_108 +.LBB0_138: # %if.then98.i.i.i.i.i + # in Loop: Header=BB0_81 Depth=4 ld a0, 40(s4) addi a2, a2, 1 - bltu a2, a0, .LBB0_108 - j .LBB0_109 -.LBB0_140: # %prune_ref_by_reuse.exit.i.i.i - # in Loop: Header=BB0_73 Depth=3 + bltu a2, a0, .LBB0_107 + j .LBB0_108 +.LBB0_139: # %prune_ref_by_reuse.exit.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a0, 0(s3) - beqz a0, .LBB0_156 -# %bb.141: # %land.lhs.true.i.i.i12 - # in Loop: Header=BB0_73 Depth=3 + beqz a0, .LBB0_155 +# %bb.140: # %land.lhs.true.i.i.i12 + # in Loop: Header=BB0_72 Depth=3 .Lpcrel_hi35: auipc a1, %got_pcrel_hi(dump_flags) ld a1, %pcrel_lo(.Lpcrel_hi35)(a1) lbu a1, 0(a1) andi a1, a1, 8 - beqz a1, .LBB0_156 -# %bb.142: # %if.then.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beqz a1, .LBB0_155 +# %bb.141: # %if.then.i.i.i + # in Loop: Header=BB0_72 Depth=3 .Lpcrel_hi36: auipc a1, %pcrel_hi(.L.str.27) addi a1, a1, %pcrel_lo(.Lpcrel_hi36) mv a2, s4 call fprintf ld a0, 40(s4) - beqz a0, .LBB0_147 -# %bb.143: # %if.then.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beqz a0, .LBB0_146 +# %bb.142: # %if.then.i.i.i + # in Loop: Header=BB0_72 Depth=3 addi s1, s4, 32 li a1, -1 - bne a0, a1, .LBB0_149 -# %bb.144: # %land.lhs.true4.i.i.i - # in Loop: Header=BB0_73 Depth=3 + bne a0, a1, .LBB0_148 +# %bb.143: # %land.lhs.true4.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a0, 32(s4) - beq a0, a1, .LBB0_153 -# %bb.145: # %land.lhs.true4.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beq a0, a1, .LBB0_152 +# %bb.144: # %land.lhs.true4.i.i.i + # in Loop: Header=BB0_72 Depth=3 li a1, 1 - bne a0, a1, .LBB0_151 -# %bb.146: # %if.then6.i.i.i - # in Loop: Header=BB0_73 Depth=3 + bne a0, a1, .LBB0_150 +# %bb.145: # %if.then6.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi37: auipc a0, %pcrel_hi(.L.str.28) addi a0, a0, %pcrel_lo(.Lpcrel_hi37) - j .LBB0_148 -.LBB0_147: # %if.then10.i.i.i - # in Loop: Header=BB0_73 Depth=3 + j .LBB0_147 +.LBB0_146: # %if.then10.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi38: auipc a0, %pcrel_hi(.L.str.29) addi a0, a0, %pcrel_lo(.Lpcrel_hi38) -.LBB0_148: # %if.end34.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_147: # %if.end34.i.i.i + # in Loop: Header=BB0_72 Depth=3 li a1, 16 - j .LBB0_154 -.LBB0_149: # %if.else12.thread.i.i.i - # in Loop: Header=BB0_73 Depth=3 + j .LBB0_153 +.LBB0_148: # %if.else12.thread.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a1, 32(s4) - bgeu a1, a0, .LBB0_153 -# %bb.150: # %if.then21.i.i.i - # in Loop: Header=BB0_73 Depth=3 + bgeu a1, a0, .LBB0_152 +# %bb.149: # %if.then21.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi40: auipc a0, %pcrel_hi(.L.str.31) @@ -1302,12 +1291,12 @@ addi a1, a1, %pcrel_lo(.Lpcrel_hi41) call fprintf ld a0, 32(s4) -.LBB0_151: # %if.end.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_150: # %if.end.i.i.i + # in Loop: Header=BB0_72 Depth=3 li a1, 1 - beq a0, a1, .LBB0_155 -# %bb.152: # %if.then27.i.i.i - # in Loop: Header=BB0_73 Depth=3 + beq a0, a1, .LBB0_154 +# %bb.151: # %if.then27.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi42: auipc a0, %pcrel_hi(.L.str.32) @@ -1321,20 +1310,20 @@ auipc a1, %pcrel_hi(.L.str.21) addi a1, a1, %pcrel_lo(.Lpcrel_hi43) call fprintf - j .LBB0_155 -.LBB0_153: # %if.then16.i.i.i - # in Loop: Header=BB0_73 Depth=3 + j .LBB0_154 +.LBB0_152: # %if.then16.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a3, 0(s3) .Lpcrel_hi39: auipc a0, %pcrel_hi(.L.str.30) addi a0, a0, %pcrel_lo(.Lpcrel_hi39) li a1, 14 -.LBB0_154: # %if.end34.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_153: # %if.end34.i.i.i + # in Loop: Header=BB0_72 Depth=3 li a2, 1 call fwrite -.LBB0_155: # %if.end34.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_154: # %if.end34.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld a1, 0(s3) li a0, 10 call fputc @@ -1353,18 +1342,27 @@ add a0, sp, a0 addi a0, a0, 368 vl2r.v v28, (a0) # Unknown-size Folded Reload -.LBB0_156: # %for.inc.i.i.i - # in Loop: Header=BB0_73 Depth=3 +.LBB0_155: # %for.inc.i.i.i + # in Loop: Header=BB0_72 Depth=3 ld s4, 56(s4) - bnez s4, .LBB0_72 - j .LBB0_70 + bnez s4, .LBB0_71 + j .LBB0_69 +.LBB0_156: # %if.then3.i + # in Loop: Header=BB0_32 Depth=1 +.Lpcrel_hi23: + auipc a0, %pcrel_hi(.L.str.15) + addi a0, a0, %pcrel_lo(.Lpcrel_hi23) + li a1, 22 + li a2, 1 + call fwrite + li s1, 0 + j .LBB0_381 .LBB0_157: # %for.body.i37.i.preheader # in Loop: Header=BB0_32 Depth=1 - li a5, 0 + li a3, 0 + ld a0, 80(sp) # 8-byte Folded Reload ld s5, 72(sp) # 8-byte Folded Reload - mv a0, s5 - li a3, -1 - ld a4, 240(sp) # 8-byte Folded Reload + li s1, -1 j .LBB0_159 .LBB0_158: # %for.inc5.i.i # in Loop: Header=BB0_159 Depth=2 @@ -1386,29 +1384,30 @@ # Parent Loop BB0_159 Depth=2 # => This Inner Loop Header: Depth=3 ld a2, 40(a1) - bne a2, a3, .LBB0_160 + bne a2, s1, .LBB0_160 # %bb.162: # %should_issue_prefetch_p.exit.i.i # in Loop: Header=BB0_161 Depth=3 lbu a2, 64(a1) andi a2, a2, 8 bnez a2, .LBB0_160 # %bb.163: # in Loop: Header=BB0_161 Depth=3 - addiw a5, a5, 1 + addiw a3, a3, 1 j .LBB0_160 .LBB0_164: # %estimate_prefetch_count.exit.i # in Loop: Header=BB0_32 Depth=1 - ld s4, 136(sp) # 8-byte Folded Reload - beqz a5, .LBB0_179 + ld s4, 144(sp) # 8-byte Folded Reload + beqz a3, .LBB0_179 # %bb.165: # %if.end9.i # in Loop: Header=BB0_32 Depth=1 - sd a5, 24(sp) # 8-byte Folded Spill - li s8, -1 - ld a0, 48(a4) + sd a3, 24(sp) # 8-byte Folded Spill + ld a0, 240(sp) # 8-byte Folded Reload + ld a0, 48(a0) sd zero, 336(sp) sd zero, 328(sp) beqz a0, .LBB0_190 .LBB0_166: # %determine_loop_nest_reuse.exit.i # in Loop: Header=BB0_32 Depth=1 + li s2, -1 .Lpcrel_hi51: auipc a0, %got_pcrel_hi(eni_time_weights) ld a1, %pcrel_lo(.Lpcrel_hi51)(a0) @@ -1417,7 +1416,8 @@ call tree_num_loop_insns ld a1, 0(s10) addi a1, a1, 2047 - lw s2, 1065(a1) + lw a1, 1065(a1) + sd a1, 208(sp) # 8-byte Folded Spill mv s9, a0 mv a0, s1 li a1, 0 @@ -1430,7 +1430,7 @@ call tree_num_loop_insns ld a1, 0(s10) lw a1, 840(a1) - mv a5, a0 + mv s8, a0 divuw a0, a1, a0 slti a1, s7, 0 not a1, a1 @@ -1451,9 +1451,8 @@ bltu s1, a0, .LBB0_183 # %bb.169: # %for.body.i83.i.preheader # in Loop: Header=BB0_32 Depth=1 - sd a5, 208(sp) # 8-byte Folded Spill li s5, 1 - ld s3, 72(sp) # 8-byte Folded Reload + ld s3, 80(sp) # 8-byte Folded Reload j .LBB0_171 .LBB0_170: # %for.inc21.i.i # in Loop: Header=BB0_171 Depth=2 @@ -1475,7 +1474,7 @@ # Parent Loop BB0_171 Depth=2 # => This Inner Loop Header: Depth=3 ld a0, 40(s4) - bne a0, s8, .LBB0_172 + bne a0, s2, .LBB0_172 # %bb.174: # %should_issue_prefetch_p.exit.i93.i # in Loop: Header=BB0_173 Depth=3 lbu a0, 64(s4) @@ -1496,14 +1495,16 @@ mv a0, s4 call free li s1, 0 - ld s4, 136(sp) # 8-byte Folded Reload - ld a0, 160(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload j .LBB0_381 .LBB0_178: # in Loop: Header=BB0_32 Depth=1 - ld s4, 136(sp) # 8-byte Folded Reload + li s1, 0 + ld s4, 144(sp) # 8-byte Folded Reload + ld s5, 72(sp) # 8-byte Folded Reload j .LBB0_380 .LBB0_179: # in Loop: Header=BB0_32 Depth=1 li s1, 0 + ld s3, 80(sp) # 8-byte Folded Reload j .LBB0_377 .LBB0_180: # %for.end23.i.i # in Loop: Header=BB0_32 Depth=1 @@ -1511,9 +1512,8 @@ ld a0, 240(sp) # 8-byte Folded Reload mv a1, s5 call can_unroll_loop_p - ld s3, 152(sp) # 8-byte Folded Reload - ld s4, 136(sp) # 8-byte Folded Reload - ld a5, 208(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload beqz a0, .LBB0_182 # %bb.181: # %should_unroll_loop_p.exit.i.i # in Loop: Header=BB0_32 Depth=1 @@ -1526,6 +1526,7 @@ .LBB0_183: # %determine_unroll_factor.exit.i # in Loop: Header=BB0_32 Depth=1 ld a0, 0(s3) + ld s2, 208(sp) # 8-byte Folded Reload add s2, s9, s2 addi s2, s2, -1 divuw s9, s2, s9 @@ -1547,11 +1548,10 @@ mv a2, s9 mv a3, s5 mv a4, s7 - mv s2, a5 + mv a5, s8 mv a6, s1 ld a7, 24(sp) # 8-byte Folded Reload call fprintf - mv a5, s2 .LBB0_186: # %if.end20.i # in Loop: Header=BB0_32 Depth=1 beqz s1, .LBB0_189 @@ -1560,14 +1560,15 @@ ld a0, 0(s10) addi a0, a0, 2047 lw a1, 1641(a0) - divuw a2, a5, s1 + divuw a2, s8, s1 bge a2, a1, .LBB0_307 .LBB0_188: # in Loop: Header=BB0_32 Depth=1 li s1, 0 .LBB0_189: # in Loop: Header=BB0_32 Depth=1 - ld s8, 216(sp) # 8-byte Folded Reload - ld s9, 80(sp) # 8-byte Folded Reload ld s5, 72(sp) # 8-byte Folded Reload + ld s8, 216(sp) # 8-byte Folded Reload + ld s9, 88(sp) # 8-byte Folded Reload + ld s3, 80(sp) # 8-byte Folded Reload j .LBB0_377 .LBB0_190: # %while.cond.preheader.i.i # in Loop: Header=BB0_32 Depth=1 @@ -1576,6 +1577,7 @@ ld a0, 32(a0) ld a0, 24(a0) ld a1, 240(sp) # 8-byte Folded Reload + ld s5, 136(sp) # 8-byte Folded Reload .LBB0_191: # %while.cond.i.i16 # Parent Loop BB0_32 Depth=1 # => This Inner Loop Header: Depth=2 @@ -1610,25 +1612,23 @@ mv a0, s8 call find_loop_nest ld a0, 328(sp) - li s1, -1 beqz a0, .LBB0_199 # %bb.198: # %cond.true.i.i.i20 # in Loop: Header=BB0_32 Depth=1 lw a0, 0(a0) .LBB0_199: # %VEC_loop_p_base_length.exit.i.i21 # in Loop: Header=BB0_32 Depth=1 - ld s5, 72(sp) # 8-byte Folded Reload sd a0, 208(sp) # 8-byte Folded Spill slli a0, a0, 32 srli a1, a0, 32 - sd a1, 144(sp) # 8-byte Folded Spill + sd a1, 152(sp) # 8-byte Folded Spill srli a0, a0, 30 call xmalloc ld a1, 0(s10) - sd a0, 88(sp) # 8-byte Folded Spill + sd a0, 96(sp) # 8-byte Folded Spill li a5, 0 addi a0, a1, 2047 - mv a1, s5 + ld a1, 80(sp) # 8-byte Folded Reload j .LBB0_201 .LBB0_200: # %for.inc7.i.i.i # in Loop: Header=BB0_201 Depth=2 @@ -1660,7 +1660,7 @@ j .LBB0_202 .LBB0_205: # %while.cond10.outer.i.i.preheader # in Loop: Header=BB0_32 Depth=1 - ld s1, 88(sp) # 8-byte Folded Reload + ld s1, 96(sp) # 8-byte Folded Reload addi s1, s1, -4 ld s2, 208(sp) # 8-byte Folded Reload .LBB0_206: # %while.cond10.outer.i.i @@ -1691,8 +1691,7 @@ # %bb.209: # %if.end19.i.i # in Loop: Header=BB0_206 Depth=2 sd s1, 200(sp) # 8-byte Folded Spill - mv s1, s5 - mv s5, a5 + mv s1, a5 ld a1, 328(sp) add a0, a1, a0 ld s7, 0(a0) @@ -1708,16 +1707,14 @@ srli a0, a0, 32 .LBB0_211: # %if.end33.i.i # in Loop: Header=BB0_206 Depth=2 - mulw a5, s5, a0 - mv s5, s1 + mulw a5, s1, a0 ld s1, 200(sp) # 8-byte Folded Reload j .LBB0_206 .LBB0_212: # %for.body.i58.i.loopexit # in Loop: Header=BB0_32 Depth=1 sd a5, 192(sp) # 8-byte Folded Spill li s7, 0 - mv s1, s5 - ld s5, 128(sp) # 8-byte Folded Reload + ld s1, 80(sp) # 8-byte Folded Reload j .LBB0_214 .LBB0_213: # %for.inc51.i.i # in Loop: Header=BB0_214 Depth=2 @@ -1791,9 +1788,9 @@ li a3, 0 addi a0, s7, 8 sd a0, 48(sp) # 8-byte Folded Spill - ld a2, 144(sp) # 8-byte Folded Reload + ld a2, 152(sp) # 8-byte Folded Reload slli a0, a2, 2 - ld a1, 88(sp) # 8-byte Folded Reload + ld a1, 96(sp) # 8-byte Folded Reload add a0, a1, a0 addi a0, a0, -4 sd a0, 40(sp) # 8-byte Folded Spill @@ -1812,17 +1809,17 @@ # Child Loop BB0_230 Depth 4 # Child Loop BB0_245 Depth 4 # Child Loop BB0_257 Depth 3 - sd s5, 128(sp) # 8-byte Folded Spill - sd a3, 112(sp) # 8-byte Folded Spill + sd s5, 136(sp) # 8-byte Folded Spill + sd a3, 120(sp) # 8-byte Folded Spill slli a0, a3, 3 ld a1, 48(sp) # 8-byte Folded Reload add a0, a1, a0 ld s1, 0(a0) ld s3, 8(s1) li a1, 8 - ld a0, 144(sp) # 8-byte Folded Reload + ld a0, 152(sp) # 8-byte Folded Reload call xcalloc - sd s1, 104(sp) # 8-byte Folded Spill + sd s1, 112(sp) # 8-byte Folded Spill ld a1, 80(s1) sd a0, 200(sp) # 8-byte Folded Spill ld a4, 232(sp) # 8-byte Folded Reload @@ -2027,9 +2024,9 @@ addi a2, a2, -8 ld a3, 32(sp) # 8-byte Folded Reload ld a4, 40(sp) # 8-byte Folded Reload - ld s3, 152(sp) # 8-byte Folded Reload - ld s5, 128(sp) # 8-byte Folded Reload - ld s4, 104(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s5, 136(sp) # 8-byte Folded Reload + ld s4, 112(sp) # 8-byte Folded Reload j .LBB0_257 .LBB0_256: # %cleanup.i.i.i # in Loop: Header=BB0_257 Depth=3 @@ -2059,9 +2056,9 @@ j .LBB0_261 .LBB0_260: # in Loop: Header=BB0_225 Depth=2 li s1, -1 - ld s3, 152(sp) # 8-byte Folded Reload - ld s5, 128(sp) # 8-byte Folded Reload - ld s4, 104(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s5, 136(sp) # 8-byte Folded Reload + ld s4, 112(sp) # 8-byte Folded Reload .LBB0_261: # %self_reuse_distance.exit.i.i # in Loop: Header=BB0_225 Depth=2 ld a0, 200(sp) # 8-byte Folded Reload @@ -2074,8 +2071,8 @@ sw s1, 48(a0) .LBB0_263: # %if.end71.i.i # in Loop: Header=BB0_225 Depth=2 - ld s4, 136(sp) # 8-byte Folded Reload - ld a3, 112(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload + ld a3, 120(sp) # 8-byte Folded Reload beqz s5, .LBB0_224 # %bb.264: # %if.then73.i.i # in Loop: Header=BB0_225 Depth=2 @@ -2099,13 +2096,13 @@ addiw t0, a6, -1 slli a1, t0, 32 srli t1, a1, 32 - sd t1, 128(sp) # 8-byte Folded Spill + sd t1, 136(sp) # 8-byte Folded Spill j .LBB0_268 .LBB0_267: # %for.inc208.i.i # in Loop: Header=BB0_268 Depth=2 ld a0, 336(sp) addiw a7, a7, 1 - ld s3, 152(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload beqz a0, .LBB0_301 .LBB0_268: # %land.lhs.true.i104.i.i # Parent Loop BB0_32 Depth=1 @@ -2181,7 +2178,7 @@ # %bb.279: # %for.body.i120.i.i.preheader # in Loop: Header=BB0_275 Depth=3 mv a2, a1 - ld a3, 144(sp) # 8-byte Folded Reload + ld a3, 152(sp) # 8-byte Folded Reload .LBB0_280: # %for.body.i120.i.i # Parent Loop BB0_32 Depth=1 # Parent Loop BB0_268 Depth=2 @@ -2273,7 +2270,7 @@ call fancy_abort ld t4, 168(sp) # 8-byte Folded Reload ld t3, 176(sp) # 8-byte Folded Reload - ld t1, 128(sp) # 8-byte Folded Reload + ld t1, 136(sp) # 8-byte Folded Reload mv t0, s9 ld a7, 200(sp) # 8-byte Folded Reload lw a2, 0(s5) @@ -2282,7 +2279,7 @@ # in Loop: Header=BB0_275 Depth=3 srli a1, s2, 32 slli a1, a1, 2 - ld a3, 88(sp) # 8-byte Folded Reload + ld a3, 96(sp) # 8-byte Folded Reload add a1, a3, a1 lw a3, 0(a1) add s1, a0, s1 @@ -2316,16 +2313,16 @@ sw s3, 48(s8) j .LBB0_267 .LBB0_300: # in Loop: Header=BB0_268 Depth=2 - ld s4, 136(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload lw a0, 48(t3) bltu s3, a0, .LBB0_297 j .LBB0_298 -.LBB0_301: # in Loop: Header=BB0_32 Depth=1 - li s8, -1 +.LBB0_301: # %for.end210.i.i + # in Loop: Header=BB0_32 Depth=1 call free_dependence_relations mv a0, s7 call free_data_refs - ld a0, 88(sp) # 8-byte Folded Reload + ld a0, 96(sp) # 8-byte Folded Reload call free ld a3, 0(s3) beqz a3, .LBB0_166 @@ -2345,7 +2342,7 @@ li a1, 17 li a2, 1 call fwrite - ld s1, 72(sp) # 8-byte Folded Reload + ld s1, 80(sp) # 8-byte Folded Reload j .LBB0_305 .LBB0_304: # %for.inc228.i.i # in Loop: Header=BB0_305 Depth=2 @@ -2391,9 +2388,10 @@ ld a1, %pcrel_lo(.Lpcrel_hi55)(a1) lbu a1, 0(a1) andi a1, a1, 8 - ld s8, 216(sp) # 8-byte Folded Reload - ld s9, 80(sp) # 8-byte Folded Reload ld s5, 72(sp) # 8-byte Folded Reload + ld s8, 216(sp) # 8-byte Folded Reload + ld s9, 88(sp) # 8-byte Folded Reload + ld s3, 80(sp) # 8-byte Folded Reload bnez a1, .LBB0_319 # %bb.311: # in Loop: Header=BB0_32 Depth=1 li s1, 0 @@ -2402,12 +2400,14 @@ # in Loop: Header=BB0_32 Depth=1 lw a0, 1609(a0) ld a1, 24(sp) # 8-byte Folded Reload - divuw a1, a5, a1 + divuw a1, s8, a1 blt a1, a0, .LBB0_188 .LBB0_313: # %if.end24.i # in Loop: Header=BB0_32 Depth=1 ld a0, 240(sp) # 8-byte Folded Reload ld a0, 48(a0) + li s2, 14 + lui s8, 6 bnez a0, .LBB0_359 # %bb.314: # %if.end.i.i108.i # in Loop: Header=BB0_32 Depth=1 @@ -2500,7 +2500,7 @@ .LBB0_325: # %for.body.i121.i.preheader # in Loop: Header=BB0_32 Depth=1 li s4, 0 - ld s7, 72(sp) # 8-byte Folded Reload + ld s7, 80(sp) # 8-byte Folded Reload j .LBB0_327 .LBB0_326: # %for.inc8.i.i # in Loop: Header=BB0_327 Depth=2 @@ -2538,8 +2538,7 @@ ld a0, 16(a0) lwu a1, 0(a0) and a1, a1, a2 - li a3, 14 - bne a1, a3, .LBB0_333 + bne a1, s2, .LBB0_333 # %bb.332: # %cond.true.i.i.i.i # in Loop: Header=BB0_328 Depth=3 call vector_type_mode @@ -2558,15 +2557,14 @@ auipc a1, %got_pcrel_hi(optab_table) ld a1, %pcrel_lo(.Lpcrel_hi58)(a1) slli a0, a0, 2 - lui a3, 6 - add a1, a1, a3 + add a1, a1, s8 add a0, a1, a0 lw a0, -352(a0) - ld a1, 120(sp) # 8-byte Folded Reload + ld a1, 128(sp) # 8-byte Folded Reload beq a0, a1, .LBB0_330 # %bb.335: # %if.end.i9.i.i # in Loop: Header=BB0_328 Depth=3 - ld a0, 152(sp) # 8-byte Folded Reload + ld a0, 160(sp) # 8-byte Folded Reload ld a0, 0(a0) beqz a0, .LBB0_338 # %bb.336: # %land.lhs.true.i10.i.i @@ -2601,7 +2599,7 @@ j .LBB0_326 .LBB0_339: # %for.end10.i.i # in Loop: Header=BB0_32 Depth=1 - ld s3, 152(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload beqz s4, .LBB0_359 # %bb.340: # %for.end10.i.i # in Loop: Header=BB0_32 Depth=1 @@ -2617,10 +2615,11 @@ # in Loop: Header=BB0_32 Depth=1 mv s3, a0 lw a0, 0(a0) + ld s1, 208(sp) # 8-byte Folded Reload beqz a0, .LBB0_357 # %bb.343: # %for.body.i17.i.i.preheader # in Loop: Header=BB0_32 Depth=1 - li s1, 0 + li s8, 0 addi s2, s3, 8 j .LBB0_346 .LBB0_344: # in Loop: Header=BB0_346 Depth=2 @@ -2638,17 +2637,16 @@ mv a0, s4 call mark_virtual_ops_for_renaming lwu a0, 0(s3) - addi s1, s1, 1 - bgeu s1, a0, .LBB0_357 + addi s8, s8, 1 + bgeu s8, a0, .LBB0_357 .LBB0_346: # %for.body.i17.i.i # Parent Loop BB0_32 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_354 Depth 3 - slli a0, s1, 3 + slli a0, s8, 3 add a0, s2, a0 ld s7, 0(a0) - ld a0, 208(sp) # 8-byte Folded Reload - ld a0, 0(a0) + ld a0, 0(s1) li a1, 0 call gimple_build_call ld a1, 8(s7) @@ -2711,9 +2709,10 @@ # in Loop: Header=BB0_32 Depth=1 lui a0, 4 call update_ssa - ld s3, 152(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload .LBB0_359: # %mark_nontemporal_stores.exit.i # in Loop: Header=BB0_32 Depth=1 + li s1, -1 ld a0, 0(s10) addi a1, a0, 2047 ld a0, 0(s3) @@ -2743,8 +2742,8 @@ slli a0, s5, 32 srli a0, a0, 32 addi a0, a0, -1 - ld a1, 72(sp) # 8-byte Folded Reload - ld s4, 136(sp) # 8-byte Folded Reload + ld a1, 80(sp) # 8-byte Folded Reload + ld s4, 144(sp) # 8-byte Folded Reload j .LBB0_364 .LBB0_363: # %for.inc24.i.i # in Loop: Header=BB0_364 Depth=2 @@ -2770,7 +2769,7 @@ # Parent Loop BB0_364 Depth=2 # => This Inner Loop Header: Depth=3 ld a4, 40(a3) - bne a4, s8, .LBB0_366 + bne a4, s1, .LBB0_366 # %bb.368: # %should_issue_prefetch_p.exit.i163.i # in Loop: Header=BB0_367 Depth=3 lbu a4, 64(a3) @@ -2800,8 +2799,8 @@ # %bb.373: # %if.end28.split.i # in Loop: Header=BB0_32 Depth=1 li a1, 1 - ld s5, 72(sp) # 8-byte Folded Reload - mv a0, s5 + ld s3, 80(sp) # 8-byte Folded Reload + mv a0, s3 mv a2, s9 call issue_prefetches li s1, 0 @@ -2816,29 +2815,31 @@ mv a0, s1 mv a1, s5 call tree_unroll_loop - ld a0, 72(sp) # 8-byte Folded Reload + ld s3, 80(sp) # 8-byte Folded Reload + mv a0, s3 mv a1, s5 - mv s5, a0 mv a2, s9 call issue_prefetches li s1, 1 .LBB0_375: # %for.body.i173.preheader.i # in Loop: Header=BB0_32 Depth=1 + ld s5, 72(sp) # 8-byte Folded Reload ld s8, 216(sp) # 8-byte Folded Reload - ld s9, 80(sp) # 8-byte Folded Reload + ld s9, 88(sp) # 8-byte Folded Reload j .LBB0_377 .LBB0_376: # %for.end.i.i # in Loop: Header=BB0_377 Depth=2 - mv a0, s5 + mv a0, s7 call free - mv s5, s2 + mv s3, s2 beqz s2, .LBB0_379 .LBB0_377: # %for.body.i173.i # Parent Loop BB0_32 Depth=1 # => This Loop Header: Depth=2 # Child Loop BB0_378 Depth 3 - ld a0, 16(s5) - ld s2, 24(s5) + ld a0, 16(s3) + mv s7, s3 + ld s2, 24(s3) beqz a0, .LBB0_376 .LBB0_378: # %for.body3.i176.i # Parent Loop BB0_32 Depth=1 @@ -2850,18 +2851,15 @@ bnez s3, .LBB0_378 j .LBB0_376 .LBB0_379: # in Loop: Header=BB0_32 Depth=1 - ld s3, 152(sp) # 8-byte Folded Reload - ld s2, 96(sp) # 8-byte Folded Reload + ld s3, 160(sp) # 8-byte Folded Reload + ld s2, 104(sp) # 8-byte Folded Reload .LBB0_380: # %loop_prefetch_arrays.exit # in Loop: Header=BB0_32 Depth=1 - ld a0, 160(sp) # 8-byte Folded Reload - ld s5, 56(sp) # 8-byte Folded Reload - ld s7, 64(sp) # 8-byte Folded Reload + ld s7, 56(sp) # 8-byte Folded Reload .LBB0_381: # %loop_prefetch_arrays.exit # in Loop: Header=BB0_32 Depth=1 ld a3, 0(s3) - or a0, s1, a0 - sd a0, 160(sp) # 8-byte Folded Spill + or s5, s1, s5 beqz a3, .LBB0_384 # %bb.382: # %land.lhs.true61 # in Loop: Header=BB0_32 Depth=1 @@ -2955,8 +2953,7 @@ .LBB0_395: # %if.then.i7.i mv a0, s2 call free - ld a0, 160(sp) # 8-byte Folded Reload - beqz a0, .LBB0_397 + beqz s5, .LBB0_397 # %bb.396: # %if.then68 call scev_reset li s0, 32 --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ifcvt.s 2024-04-01 12:40:59.494446359 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ifcvt.s 2024-04-01 12:41:11.530110709 +0000 @@ -3283,22 +3283,20 @@ neg a1, a1 and a1, a1, a0 vsetvli a5, zero, e32, m1, ta, ma - vmv.v.i v9, 0 - vsetvli zero, zero, e32, m1, tu, ma vmv.v.i v8, 0 + vsetvli zero, zero, e32, m1, tu, ma vmv.s.x v8, s1 mv a5, a1 .LBB10_25: # %vector.body # =>This Inner Loop Header: Depth=1 - vl1re32.v v10, (a4) + vl1re32.v v9, (a4) vsetvli zero, zero, e64, m2, ta, ma - vsext.vf2 v12, v10 - vsll.vi v10, v12, 3 + vsext.vf2 v10, v9 + vsll.vi v10, v10, 3 vluxei64.v v10, (s10), v10 vmseq.vi v0, v10, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v10, v9, 1, v0 - vadd.vv v8, v8, v10 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, a3 add a4, a4, a2 bnez a5, .LBB10_25 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/sculpt_paint/paint_curve.s 2024-04-01 12:40:58.974460860 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/sculpt_paint/paint_curve.s 2024-04-01 12:41:10.978126102 +0000 @@ -378,49 +378,43 @@ vid.v v8 vmul.vx v12, v8, a6 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v10, 0 + vmv.v.i v8, 0 li a6, 51 - vmset.m v9 li a7, 52 li t0, 53 mv t1, a3 mv t2, a1 - vmv.v.i v16, 0 .LBB7_6: # %vector.body # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e64, m4, ta, ma - vadd.vx v20, v12, t2 + vadd.vx v16, v12, t2 vsetvli zero, zero, e8, mf2, ta, ma - vluxei64.v v8, (a6), v20 - vluxei64.v v18, (a7), v20 - vor.vv v8, v8, v18 - vand.vi v19, v8, 1 - vmseq.vi v8, v19, 0 - vmv1r.v v0, v8 - vluxei64.v v24, (t0), v20, v0.t - vmsne.vi v19, v19, 0 - vand.vi v24, v24, 1 - vmseq.vi v25, v24, 0 - vmsne.vi v24, v24, 0 - vmmv.m v26, v19 - vmor.mm v24, v19, v24 - vmand.mm v24, v24, v8 - vmor.mm v0, v24, v26 - vor.vi v18, v18, 2 - vsoxei64.v v18, (a7), v20, v0.t - vmxor.mm v18, v9, v25 - vmand.mm v8, v18, v8 - vmand.mm v18, v9, v19 - vmor.mm v0, v8, v18 + vluxei64.v v10, (a6), v16 + vluxei64.v v11, (a7), v16 + vor.vv v10, v10, v11 + vand.vi v10, v10, 1 + vmseq.vi v0, v10, 0 + vluxei64.v v20, (t0), v16, v0.t + vand.vi v20, v20, 1 + vor.vv v21, v10, v20 + vmsne.vi v10, v10, 0 + vmsne.vi v20, v20, 0 + vmmv.m v22, v10 + vmor.mm v10, v10, v20 + vmand.mm v10, v10, v0 + vmor.mm v0, v10, v22 + vor.vi v10, v11, 2 + vsoxei64.v v10, (a7), v16, v0.t + vmseq.vi v0, v21, 0 vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v18, v10, 1, v0 - vadd.vv v16, v16, v18 + vadd.vi v10, v8, 1 + vmerge.vvm v8, v10, v8, v0 sub t1, t1, a4 add t2, t2, a5 bnez t1, .LBB7_6 # %bb.7: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v16, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a4, v8 bne a3, a2, .LBB7_11 .LBB7_8: # %for.end --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/physics/particle_edit.s 2024-04-01 12:40:58.966461083 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/physics/particle_edit.s 2024-04-01 12:41:10.970126325 +0000 @@ -15107,9 +15107,8 @@ vsetvli a4, zero, e64, m2, ta, ma vid.v v10 vsetvli zero, zero, e32, m1, ta, ma - vmv.v.i v9, 0 - vsetvli zero, zero, e32, m1, tu, ma vmv.v.i v8, 0 + vsetvli zero, zero, e32, m1, tu, ma vmv.s.x v8, t1 slli a4, s8, 2 vsetvli zero, zero, e64, m2, ta, ma @@ -15121,24 +15120,24 @@ # =>This Inner Loop Header: Depth=1 addi t0, s7, 12 vsetvli zero, zero, e16, mf2, ta, ma - vluxei64.v v14, (t0), v12 - vand.vi v14, v14, 9 - vmseq.vi v0, v14, 1 + vluxei64.v v9, (t0), v12 + vand.vi v9, v9, 9 + vmseq.vi v0, v9, 1 vlse64.v v14, (s5), zero, v0.t vsetvli zero, zero, e64, m2, ta, ma vmacc.vx v14, a5, v10 vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v16, (a6), v14, v0.t - vadd.vv v14, v16, v16 + vluxei64.v v9, (a6), v14, v0.t + vadd.vv v9, v9, v9 vsetvli zero, zero, e64, m2, ta, ma - vsext.vf2 v16, v14 - vsll.vi v14, v16, 2 + vsext.vf2 v14, v9 + vsll.vi v14, v14, 2 vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v16, (s0), v14, v0.t - vmsne.vi v14, v16, -1 - vmand.mm v0, v0, v14 - vmerge.vim v14, v9, 1, v0 - vadd.vv v8, v8, v14 + vluxei64.v v9, (s0), v14, v0.t + vmsne.vi v9, v9, -1 + vmand.mm v0, v0, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e64, m2, ta, ma vadd.vx v10, v10, a2 sub a7, a7, a2 @@ -16084,9 +16083,8 @@ vluxei64.v v9, (s2), v12 vand.vi v9, v9, 5 vmseq.vi v0, v9, 1 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v18, v10, 1, v0 - vadd.vv v16, v16, v18 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v16, v16, 1, v0.t sub t3, t3, a4 add s1, s1, t2 bnez t3, .LBB85_28 --- build.a/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pcfrac.s 2024-04-01 12:41:02.850352768 +0000 +++ build.b/MultiSource/Benchmarks/MallocBench/cfrac/CMakeFiles/cfrac.dir/pcfrac.s 2024-04-01 12:41:14.962015002 +0000 @@ -161,25 +161,23 @@ addi a6, s5, 1 neg a4, a5 and a4, a2, a4 - vsetvli a3, zero, e32, m2, ta, ma - vmv.v.i v8, 0 addi a3, a4, 1 + vsetvli a7, zero, e32, m2, ta, ma + vmv.v.i v8, 0 mv a7, a4 - vmv.v.i v10, 0 .LBB3_7: # %vector.body # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a6) vsetvli zero, zero, e8, mf2, ta, ma - vmsne.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vle8.v v10, (a6) + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a7, a7, a5 add a6, a6, a5 bnez a7, .LBB3_7 # %bb.8: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a5, v8 beq a2, a4, .LBB3_11 .LBB3_9: # %for.body.preheader34 --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-dump.s 2024-04-01 12:40:59.650442008 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-dump.s 2024-04-01 12:41:11.694106136 +0000 @@ -5338,18 +5338,17 @@ bnez t1, .LBB20_24 # %bb.33: # %vector.ph srli a7, t0, 1 - vsetvli a5, zero, e64, m4, ta, ma + neg a5, a7 + vsetvli t1, zero, e64, m4, ta, ma vid.v v8 - vsetvli a5, zero, e32, m1, ta, ma - vmv.v.i v16, 0 + vsetvli t1, zero, e32, m1, ta, ma + vmv.v.i v14, 0 vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v16, a1 + vmv.s.x v14, a1 vsetvli a1, zero, e32, m2, ta, ma - vmv.v.i v14, 0 - neg a5, a7 vmv.v.i v12, 0 and a5, a3, a5 - vmv1r.v v12, v16 + vmv1r.v v12, v14 li a1, 20 mul a1, t0, a1 vsetvli zero, zero, e64, m4, ta, ma @@ -5357,23 +5356,22 @@ li t0, 40 li t1, 24 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v20, -1 + vmv.v.i v14, -1 li t2, 28 mv t3, a5 .LBB20_34: # %vector.body # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e64, m4, ta, ma - vmv4r.v v24, v8 - vmadd.vx v24, t0, v16 - vsetvli zero, zero, e32, m2, ta, ma - vluxei64.v v22, (t1), v24 - vand.vx v28, v22, a2 - vmsne.vi v0, v28, 0 - vsoxei64.v v20, (t2), v24, v0.t - vor.vx v22, v22, a0 - vsse32.v v22, (a6), t0, v0.t - vmerge.vim v22, v14, 1, v0 - vadd.vv v12, v12, v22 + vmv4r.v v20, v8 + vmadd.vx v20, t0, v16 + vsetvli zero, zero, e32, m2, ta, mu + vluxei64.v v24, (t1), v20 + vand.vx v26, v24, a2 + vmsne.vi v0, v26, 0 + vsoxei64.v v14, (t2), v20, v0.t + vor.vx v20, v24, a0 + vsse32.v v20, (a6), t0, v0.t + vadd.vi v12, v12, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma vadd.vx v8, v8, a7 sub t3, t3, a7 --- build.a/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/takehiro.s 2024-04-01 12:41:02.918350871 +0000 +++ build.b/MultiSource/Benchmarks/MiBench/consumer-lame/CMakeFiles/consumer-lame.dir/takehiro.s 2024-04-01 12:41:15.026013218 +0000 @@ -2296,63 +2296,60 @@ slli a4, t6, 3 add a4, a0, a4 vsetivli zero, 8, e32, m2, ta, ma - vmv.v.x v10, a6 - vmv.v.i v12, 0 + vmv.v.x v12, a6 + vmv.v.i v10, 0 vmv.v.x v14, a7 li t2, 240 mv t3, t6 vmv.v.i v18, 0 vmv.v.i v16, 0 - vmv.v.i v20, 0 .LBB3_67: # %vector.body128 # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e32, m2, ta, mu - vlseg2e32.v v22, (a0) - vmsne.vi v8, v22, 0 - vmsgt.vi v9, v22, 14 - vsll.vi v22, v22, 4 + vlseg2e32.v v20, (a0) + vmsne.vi v8, v20, 0 + vmsgt.vi v9, v20, 14 + vsll.vi v20, v20, 4 vmv1r.v v0, v9 - vmerge.vxm v22, v22, t2, v0 + vmerge.vxm v20, v20, t2, v0 vmv1r.v v0, v8 - vmerge.vim v26, v12, 1, v0 - vadd.vv v20, v20, v26 + vadd.vi v16, v16, 1, v0.t vmv1r.v v0, v9 - vadd.vv v16, v16, v10, v0.t - vadd.vv v18, v18, v14, v0.t - vmsgt.vi v8, v24, 14 - vmsne.vi v9, v24, 0 + vadd.vv v18, v18, v12, v0.t + vadd.vv v10, v10, v14, v0.t + vmsgt.vi v8, v22, 14 + vmsne.vi v9, v22, 0 vmv1r.v v0, v8 - vmerge.vim v24, v24, 15, v0 - vadd.vv v22, v22, v24 + vmerge.vim v22, v22, 15, v0 + vadd.vv v20, v20, v22 vmv1r.v v0, v9 - vmerge.vim v24, v12, 1, v0 - vadd.vv v20, v20, v24 + vadd.vi v16, v16, 1, v0.t vmv1r.v v0, v8 - vadd.vv v16, v16, v10, v0.t - vadd.vv v18, v18, v14, v0.t + vadd.vv v18, v18, v12, v0.t + vadd.vv v10, v10, v14, v0.t vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v24, v22 + vsext.vf2 v24, v20 vsetvli zero, zero, e8, mf2, ta, ma vluxei64.v v8, (t0), v24 vsetvli zero, zero, e16, m1, ta, ma vzext.vf2 v9, v8 - vwaddu.wv v16, v16, v9 + vwaddu.wv v18, v18, v9 vsetvli zero, zero, e8, mf2, ta, ma vluxei64.v v8, (t1), v24 vsetvli zero, zero, e16, m1, ta, ma vzext.vf2 v9, v8 - vwaddu.wv v18, v18, v9 + vwaddu.wv v10, v10, v9 addi t3, t3, -8 addi a0, a0, 64 bnez t3, .LBB3_67 # %bb.68: # %middle.block119 vsetvli zero, zero, e32, m2, ta, ma vmv.s.x v8, zero - vredsum.vs v9, v18, v8 + vredsum.vs v9, v10, v8 vmv.x.s t3, v9 - vredsum.vs v9, v16, v8 + vredsum.vs v9, v18, v8 vmv.x.s t2, v9 - vredsum.vs v8, v20, v8 + vredsum.vs v8, v16, v8 vmv.x.s t4, v8 bne t5, t6, .LBB3_77 .LBB3_69: # %count_bit_ESC.exit --- build.a/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/lac/sparse_matrix.s 2024-04-01 12:40:58.350478262 +0000 +++ build.b/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/lac/sparse_matrix.s 2024-04-01 12:41:10.354143503 +0000 @@ -1176,27 +1176,25 @@ .LBB24_4: # %vector.ph neg a3, a0 and a3, a2, a3 + slli a4, a4, 1 vsetvli a5, zero, e32, m1, ta, ma vmv.v.i v8, 0 - slli a4, a4, 1 mv a5, a3 mv a6, a1 - vmv.v.i v9, 0 .LBB24_5: # %vector.body # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (a6) vsetvli zero, zero, e64, m2, ta, ma vfabs.v v10, v10 vmfgt.vf v0, v10, fa0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v10, v8, 1, v0 - vadd.vv v9, v9, v10 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, a0 add a6, a6, a4 bnez a5, .LBB24_5 # %bb.6: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v9, v8 + vmv.s.x v9, zero + vredsum.vs v8, v8, v9 vmv.x.s a0, v8 beq a2, a3, .LBB24_9 .LBB24_7: # %for.body.preheader @@ -5896,28 +5894,27 @@ .LBB90_4: # %vector.ph neg a3, a0 and a3, a2, a3 + slli a4, a4, 1 vsetvli a5, zero, e32, m2, ta, ma vmv.v.i v8, 0 - slli a4, a4, 1 mv a5, a3 mv a6, a1 - vmv.v.i v10, 0 .LBB90_5: # %vector.body # =>This Inner Loop Header: Depth=1 - vl2re32.v v12, (a6) - vfabs.v v12, v12 - vfwcvt.f.f.v v16, v12 - vsetvli zero, zero, e64, m4, ta, ma - vmfgt.vf v0, v16, fa0 + vl2re32.v v10, (a6) vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vfabs.v v10, v10 + vfwcvt.f.f.v v12, v10 + vsetvli zero, zero, e64, m4, ta, ma + vmfgt.vf v0, v12, fa0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, a0 add a6, a6, a4 bnez a5, .LBB90_5 # %bb.6: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a0, v8 beq a2, a3, .LBB90_9 .LBB90_7: # %for.body.preheader @@ -10656,31 +10653,28 @@ sd a0, 16(sp) # 8-byte Folded Spill sub s10, s9, a0 vsetivli zero, 2, e32, mf2, ta, ma - vmv.v.i v8, 0 + vmv.v.i v9, 0 li a0, -1 srli s11, a0, 1 vsetvli zero, zero, e8, mf8, ta, ma - vmv.v.i v9, 0 + vmv.v.i v8, 0 csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 + slli a0, a0, 2 add a0, sp, a0 addi a0, a0, 48 - vs1r.v v9, (a0) # Unknown-size Folded Spill + vs1r.v v8, (a0) # Unknown-size Folded Spill sd s10, 24(sp) # 8-byte Folded Spill sd s8, 32(sp) # 8-byte Folded Spill - vmv1r.v v9, v8 - addi a0, sp, 48 - vs1r.v v8, (a0) # Unknown-size Folded Spill + vmv1r.v v8, v9 .LBB156_5: # %vector.body # =>This Inner Loop Header: Depth=1 csrr a0, vlenb - slli a0, a0, 2 + slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 48 vs1r.v v8, (a0) # Unknown-size Folded Spill csrr a0, vlenb - slli a1, a0, 2 + slli a1, a0, 1 add a0, a1, a0 add a0, sp, a0 addi a0, a0, 48 @@ -10706,13 +10700,10 @@ vand.vi v8, v8, 1 vmsne.vi v0, v8, 0 vmv.s.x v8, zero - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 48 + addi a0, sp, 48 vs1r.v v8, (a0) # Unknown-size Folded Spill vmerge.vim v8, v8, 1, v0 csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 48 vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -10728,21 +10719,18 @@ vmsne.vi v0, v8, 0 vsetivli zero, 2, e8, mf8, ta, ma csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 + slli a0, a0, 2 add a0, sp, a0 addi a0, a0, 48 vl1r.v v8, (a0) # Unknown-size Folded Reload vmerge.vim v8, v8, 1, v0 csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 48 vl1r.v v9, (a0) # Unknown-size Folded Reload vslideup.vi v8, v9, 1 vmsne.vi v8, v8, 0 csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 48 vs1r.v v8, (a0) # Unknown-size Folded Spill @@ -10756,20 +10744,21 @@ vmv.s.x v8, a0 vand.vi v8, v8, 1 vmsne.vi v0, v8, 0 - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 48 + addi a0, sp, 48 vl1r.v v8, (a0) # Unknown-size Folded Reload vmerge.vim v8, v8, 1, v0 - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 48 vs1r.v v8, (a0) # Unknown-size Folded Spill mv a0, s2 mv a1, s3 mv a2, s0 mv a3, s1 call __gttf2 + csrr a1, vlenb + slli a2, a1, 1 + add a1, a2, a1 + add a1, sp, a1 + addi a1, a1, 48 + vl1r.v v9, (a1) # Unknown-size Folded Reload sgtz a0, a0 vsetivli zero, 1, e8, mf8, ta, ma vmv.s.x v8, a0 @@ -10777,64 +10766,28 @@ vmsne.vi v0, v8, 0 vsetivli zero, 2, e8, mf8, ta, ma csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 + slli a0, a0, 2 add a0, sp, a0 addi a0, a0, 48 vl1r.v v8, (a0) # Unknown-size Folded Reload vmerge.vim v8, v8, 1, v0 - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 48 - vl1r.v v9, (a0) # Unknown-size Folded Reload - vslideup.vi v8, v9, 1 + addi a0, sp, 48 + vl1r.v v10, (a0) # Unknown-size Folded Reload + vslideup.vi v8, v10, 1 vmsne.vi v8, v8, 0 - vsetvli zero, zero, e32, mf2, ta, ma + vsetvli zero, zero, e32, mf2, ta, mu csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 48 vl1r.v v0, (a0) # Unknown-size Folded Reload - addi a0, sp, 48 - vl1r.v v10, (a0) # Unknown-size Folded Reload - vmerge.vim v9, v10, 1, v0 + vadd.vi v9, v9, 1, v0.t vmv1r.v v0, v8 - vmerge.vim v8, v10, 1, v0 - csrr a0, vlenb - slli a1, a0, 2 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 48 - vl1r.v v10, (a0) # Unknown-size Folded Reload - vadd.vv v10, v10, v9 csrr a0, vlenb - slli a1, a0, 2 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 48 - vs1r.v v10, (a0) # Unknown-size Folded Spill - csrr a0, vlenb - slli a1, a0, 2 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 48 - vl1r.v v9, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - slli a0, a0, 2 - add a0, sp, a0 - addi a0, a0, 48 - vl1r.v v10, (a0) # Unknown-size Folded Reload - vadd.vv v10, v10, v8 - csrr a0, vlenb - slli a0, a0, 2 - add a0, sp, a0 - addi a0, a0, 48 - vs1r.v v10, (a0) # Unknown-size Folded Spill - csrr a0, vlenb - slli a0, a0, 2 + slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 48 vl1r.v v8, (a0) # Unknown-size Folded Reload + vadd.vi v8, v8, 1, v0.t addi s10, s10, -4 addi s8, s8, 64 bnez s10, .LBB156_5 --- build.a/External/SPEC/CINT2017rate/541.leela_r/CMakeFiles/541.leela_r.dir/root/cpu2017/benchspec/CPU/541.leela_r/src/FastBoard.s 2024-04-01 12:41:00.130428622 +0000 +++ build.b/External/SPEC/CINT2017rate/541.leela_r/CMakeFiles/541.leela_r.dir/root/cpu2017/benchspec/CPU/541.leela_r/src/FastBoard.s 2024-04-01 12:41:12.178092638 +0000 @@ -2177,22 +2177,21 @@ .LBB20_3: # %vector.ph srli a7, a5, 3 slli t0, a7, 2 + slli a7, a7, 31 vsetvli t1, zero, e32, m1, ta, ma vmv.v.i v8, 0 vsetvli zero, zero, e32, m1, tu, ma vmv.v.i v9, 0 vmv.s.x v9, a4 vsetvli a4, zero, e32, m2, ta, ma - vmv.v.i v12, 0 - slli a7, a7, 31 vmv.v.i v10, 0 sub a4, a7, t0 - vmv1r.v v10, v9 + vmv.v.i v12, 0 vsetvli zero, zero, e32, m2, tu, ma vmv.s.x v8, a3 - vmv.v.i v14, 0 and a7, a4, a2 - vmv1r.v v14, v8 + vmv1r.v v12, v9 + vmv1r.v v10, v8 li a3, 112 li a4, 64 mv t0, a7 @@ -2201,28 +2200,26 @@ # =>This Inner Loop Header: Depth=1 vl1re16.v v8, (t1) vsetvli zero, zero, e16, m1, ta, ma - vwaddu.vv v16, v8, v8 - vluxei32.v v9, (a1), v16 - vand.vx v16, v9, a3 - vmseq.vx v8, v16, a4 - vmsne.vx v16, v16, a4 + vwaddu.vv v14, v8, v8 + vluxei32.v v9, (a1), v14 + vand.vx v8, v9, a3 + vmsne.vx v8, v8, a4 vand.vi v9, v9, 7 vmseq.vi v9, v9, 4 - vmand.mm v0, v16, v9 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v16, v12, 1, v0 - vadd.vv v14, v14, v16 + vmand.mm v0, v8, v9 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v10, v10, 1, v0.t + vadd.vi v14, v12, 1 vmv1r.v v0, v8 - vmerge.vim v8, v12, 1, v0 - vadd.vv v10, v10, v8 + vmerge.vvm v12, v14, v12, v0 sub t0, t0, a6 add t1, t1, a5 bnez t0, .LBB20_4 # %bb.5: # %middle.block vmv.s.x v8, zero - vredsum.vs v9, v10, v8 + vredsum.vs v9, v12, v8 vmv.x.s a4, v9 - vredsum.vs v8, v14, v8 + vredsum.vs v8, v10, v8 vmv.x.s a3, v8 beq a7, a2, .LBB20_10 .LBB20_6: # %for.body.preheader --- build.a/External/SPEC/CFP2017speed/644.nab_s/CMakeFiles/644.nab_s.dir/root/cpu2017/benchspec/CPU/544.nab_r/src/regex-alpha/regcomp.s 2024-04-01 12:40:59.290452048 +0000 +++ build.b/External/SPEC/CFP2017speed/644.nab_s/CMakeFiles/644.nab_s.dir/root/cpu2017/benchspec/CPU/544.nab_r/src/regex-alpha/regcomp.s 2024-04-01 12:41:11.314116733 +0000 @@ -5231,22 +5231,20 @@ li a5, 0 srli a6, a6, 1 negw a1, a6 - vsetvli a7, zero, e32, m2, ta, ma - vmv.v.i v10, 0 and a1, a2, a1 + vsetvli a7, zero, e32, m2, ta, ma vmv.v.i v8, 0 .LBB5_229: # %vector.body356 # =>This Inner Loop Header: Depth=1 andi a7, a5, 252 add a7, a3, a7 - vle8.v v12, (a7) vsetvli zero, zero, e8, mf2, ta, ma - vand.vx v12, v12, a4 - vmsne.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 + vle8.v v10, (a7) + vand.vx v10, v10, a4 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu addw a5, a5, a6 - vadd.vv v8, v8, v12 + vadd.vi v8, v8, 1, v0.t bne a1, a5, .LBB5_229 # %bb.230: # %middle.block348 vmv.s.x v10, zero --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ipa-pure-const.s 2024-04-01 12:40:59.566444349 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ipa-pure-const.s 2024-04-01 12:41:11.606108590 +0000 @@ -1042,39 +1042,37 @@ li t2, 32 li t3, 296 vsetvli zero, zero, e64, m2, ta, ma - vmv.v.i v12, 0 - vmv1r.v v10, v9 + vmv.v.i v10, 0 .LBB3_8: # %vector.body # =>This Inner Loop Header: Depth=1 and t4, a6, t0 slli t4, t4, 3 add t4, a7, t4 - vl2re64.v v14, (t4) + vl2re64.v v12, (t4) vsetvli zero, zero, e16, mf2, ta, ma - vluxei64.v v8, (t1), v14 + vluxei64.v v8, (t1), v12 vand.vx v8, v8, t2 vmsne.vi v8, v8, 0 vmv1r.v v0, v8 - vlse32.v v11, (a0), zero, v0.t + vlse32.v v14, (a0), zero, v0.t vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v16, (t3), v14, v0.t - vmsltu.vv v11, v16, v11 - vmand.mm v0, v8, v11 + vluxei64.v v15, (t3), v12, v0.t + vmsltu.vv v12, v15, v14 + vmand.mm v0, v8, v12 vsetvli zero, zero, e64, m2, ta, mu - vzext.vf2 v14, v16 - vsll.vi v14, v14, 3 - vmv2r.v v16, v12 - vluxei64.v v16, (a1), v14, v0.t - vmsne.vi v11, v16, 0 - vmand.mm v0, v8, v11 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v8, v9, 1, v0 + vzext.vf2 v12, v15 + vsll.vi v12, v12, 3 + vmv2r.v v14, v10 + vluxei64.v v14, (a1), v12, v0.t + vmsne.vi v12, v14, 0 + vmand.mm v0, v8, v12 + vsetvli zero, zero, e32, m1, ta, mu add a6, a6, a5 - vadd.vv v10, v10, v8 + vadd.vi v9, v9, 1, v0.t bne a3, a6, .LBB3_8 # %bb.9: # %middle.block vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vredsum.vs v8, v9, v8 vmv.x.s a5, v8 bne a3, a4, .LBB3_20 .LBB3_10: # %for.end.loopexit147 --- build.a/External/SPEC/CFP2017rate/511.povray_r/CMakeFiles/511.povray_r.dir/root/cpu2017/benchspec/CPU/511.povray_r/src/polysolv.s 2024-04-01 12:40:58.594471457 +0000 +++ build.b/External/SPEC/CFP2017rate/511.povray_r/CMakeFiles/511.povray_r.dir/root/cpu2017/benchspec/CPU/511.povray_r/src/polysolv.s 2024-04-01 12:41:10.582137146 +0000 @@ -1141,41 +1141,39 @@ addi t2, sp, 2047 addi t2, t2, 145 vl2r.v v10, (t2) # Unknown-size Folded Reload - vmul.vx v10, v10, a3 + vmul.vx v12, v10, a3 vsetvli zero, zero, e32, m1, ta, ma - vmv.v.i v12, 0 + vmv.v.i v10, 0 li t2, 8 addi t3, s4, -1 mv t4, a7 mv t5, s3 - vmv.v.i v13, 0 .LBB1_59: # %vector.body270 # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e64, m2, ta, ma - vlse32.v v14, (t5), a3 - vmv2r.v v16, v8 - vadd.vx v8, v10, t5 - vsext.vf2 v18, v14 - vsll.vi v14, v18, 3 - vadd.vv v8, v8, v14 + vlse32.v v11, (t5), a3 + vmv2r.v v14, v8 + vadd.vx v8, v12, t5 + vsext.vf2 v16, v11 + vsll.vi v16, v16, 3 + vadd.vv v8, v8, v16 vluxei64.v v8, (t2), v8 vsetivli zero, 1, e64, m2, ta, ma - vslidedown.vx v14, v16, t3 + vslidedown.vx v14, v14, t3 vsetvli t6, zero, e64, m2, ta, ma vslideup.vi v14, v8, 1 - vmfeq.vf v16, v14, fs0 + vmfeq.vf v11, v14, fs0 vfmul.vv v14, v14, v8 - vmflt.vf v17, v14, fs0 - vmor.mm v0, v16, v17 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v14, v12, 1, v0 - vadd.vv v13, v13, v14 + vmflt.vf v16, v14, fs0 + vmor.mm v0, v11, v16 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v10, v10, 1, v0.t sub t4, t4, s4 add t5, t5, t1 bnez t4, .LBB1_59 # %bb.60: # %middle.block261 - vmv.s.x v10, zero - vredsum.vs v10, v13, v10 + vmv.s.x v11, zero + vredsum.vs v10, v10, v11 vmv.x.s a3, v10 beq a6, a7, .LBB1_63 # %bb.61: @@ -1244,36 +1242,34 @@ addi t2, sp, 2047 addi t2, t2, 145 vl2r.v v10, (t2) # Unknown-size Folded Reload - vmul.vx v10, v10, t1 + vmul.vx v12, v10, t1 vsetvli zero, zero, e32, m1, ta, ma - vmv.v.i v12, 0 + vmv.v.i v10, 0 addi t1, s4, -1 mv t2, a6 mv t3, s3 - vmv.v.i v13, 0 .LBB1_68: # %vector.body288 # =>This Inner Loop Header: Depth=1 vmv2r.v v14, v8 addi t4, t3, 8 vsetvli zero, zero, e64, m2, ta, ma - vluxei64.v v8, (t4), v10 + vluxei64.v v8, (t4), v12 vsetivli zero, 1, e64, m2, ta, ma vslidedown.vx v14, v14, t1 vsetvli t4, zero, e64, m2, ta, ma vslideup.vi v14, v8, 1 - vmfeq.vf v16, v14, fs0 + vmfeq.vf v11, v14, fs0 vfmul.vv v14, v14, v8 - vmflt.vf v17, v14, fs0 - vmor.mm v0, v16, v17 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v14, v12, 1, v0 - vadd.vv v13, v13, v14 + vmflt.vf v16, v14, fs0 + vmor.mm v0, v11, v16 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v10, v10, 1, v0.t sub t2, t2, s4 add t3, t3, t0 bnez t2, .LBB1_68 # %bb.69: # %middle.block279 - vmv.s.x v10, zero - vredsum.vs v10, v13, v10 + vmv.s.x v11, zero + vredsum.vs v10, v10, v11 vmv.x.s t0, v10 beq a5, a6, .LBB1_72 # %bb.70: --- build.a/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/analyze.s 2024-04-01 12:41:03.010348306 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/analyze.s 2024-04-01 12:41:15.098011210 +0000 @@ -179,43 +179,41 @@ srli a0, a0, 32 addi a1, a0, -1 csrr a6, vlenb - srli a3, a6, 2 - bgeu a1, a3, .LBB0_22 + srli a4, a6, 2 + bgeu a1, a4, .LBB0_22 # %bb.21: - li a3, 0 + li a4, 0 li a5, 0 li a2, 1 j .LBB0_25 .LBB0_22: # %vector.ph - neg a4, a3 - and a4, a1, a4 - addi a2, a4, 1 + neg a3, a4 + and a3, a1, a3 + addi a2, a3, 1 addi a5, s0, 8 vsetvli a7, zero, e32, m1, ta, ma - vmv.v.i v9, 0 - slli a6, a6, 1 - mv a7, a4 - vmv.v.i v10, 0 vmv.v.i v8, 0 + slli a6, a6, 1 + mv a7, a3 + vmv.v.i v9, 0 .LBB0_23: # %vector.body # =>This Inner Loop Header: Depth=1 - vl2re64.v v12, (a5) + vl2re64.v v10, (a5) vsetvli zero, zero, e32, m1, ta, mu - vluxei64.v v11, (zero), v12 - vmsgt.vi v0, v11, 1 - vmerge.vim v12, v9, 1, v0 - vadd.vv v8, v8, v12 - vadd.vv v10, v10, v11, v0.t - sub a7, a7, a3 + vluxei64.v v12, (zero), v10 + vmsgt.vi v0, v12, 1 + vadd.vi v9, v9, 1, v0.t + vadd.vv v8, v8, v12, v0.t + sub a7, a7, a4 add a5, a5, a6 bnez a7, .LBB0_23 # %bb.24: # %middle.block - vmv.s.x v9, zero - vredsum.vs v10, v10, v9 - vmv.x.s a3, v10 - vredsum.vs v8, v8, v9 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 + vmv.x.s a4, v8 + vredsum.vs v8, v9, v10 vmv.x.s a5, v8 - beq a1, a4, .LBB0_27 + beq a1, a3, .LBB0_27 .LBB0_25: # %for.body77.preheader194 slli a1, a2, 3 add a1, s0, a1 @@ -225,16 +223,16 @@ # =>This Inner Loop Header: Depth=1 ld a2, 0(a1) lw a2, 0(a2) - slti a4, a2, 2 - xori a6, a4, 1 + slti a3, a2, 2 + xori a6, a3, 1 add a5, a5, a6 - addi a4, a4, -1 - and a2, a4, a2 + addi a3, a3, -1 + and a2, a3, a2 addi a1, a1, 8 - add a3, a2, a3 + add a4, a2, a4 bne a1, a0, .LBB0_26 .LBB0_27: # %for.end92.loopexit - fcvt.d.w fs1, a3 + fcvt.d.w fs1, a4 fcvt.d.w fs0, a5 .LBB0_28: # %for.end92 .Lpcrel_hi4: @@ -405,10 +403,10 @@ srli a0, a0, 32 addi a2, a0, -1 csrr a7, vlenb - srli a4, a7, 2 - bgeu a2, a4, .LBB0_55 + srli a5, a7, 2 + bgeu a2, a5, .LBB0_55 # %bb.53: - li a4, 0 + li a5, 0 li a6, 0 li a3, 1 j .LBB0_58 @@ -417,36 +415,34 @@ fmv.d fa4, fa5 j .LBB0_61 .LBB0_55: # %vector.ph172 - neg a5, a4 - and a5, a2, a5 - addi a3, a5, 1 + neg a4, a5 + and a4, a2, a4 + addi a3, a4, 1 addi a6, s0, 8 vsetvli t0, zero, e32, m1, ta, ma - vmv.v.i v9, 0 - slli a7, a7, 1 - mv t0, a5 - vmv.v.i v10, 0 vmv.v.i v8, 0 + slli a7, a7, 1 + mv t0, a4 + vmv.v.i v9, 0 .LBB0_56: # %vector.body178 # =>This Inner Loop Header: Depth=1 - vl2re64.v v12, (a6) + vl2re64.v v10, (a6) vsetvli zero, zero, e32, m1, ta, mu - vluxei64.v v11, (zero), v12 - vmsgt.vi v0, v11, 0 - vadd.vv v11, v11, v10 - vmerge.vim v12, v9, 1, v0 - vadd.vv v8, v8, v12 - vadd.vi v10, v11, -1, v0.t - sub t0, t0, a4 + vluxei64.v v12, (zero), v10 + vmsgt.vi v0, v12, 0 + vadd.vv v10, v12, v8 + vadd.vi v9, v9, 1, v0.t + vadd.vi v8, v10, -1, v0.t + sub t0, t0, a5 add a6, a6, a7 bnez t0, .LBB0_56 # %bb.57: # %middle.block169 - vmv.s.x v9, zero - vredsum.vs v10, v10, v9 - vmv.x.s a4, v10 - vredsum.vs v8, v8, v9 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 + vmv.x.s a5, v8 + vredsum.vs v8, v9, v10 vmv.x.s a6, v8 - beq a2, a5, .LBB0_60 + beq a2, a4, .LBB0_60 .LBB0_58: # %for.body199.preheader189 slli a2, a3, 3 add a2, s0, a2 @@ -462,11 +458,11 @@ negw a3, a3 and a0, a3, a0 addi a2, a2, 8 - add a4, a0, a4 + add a5, a0, a5 bne a2, s0, .LBB0_59 .LBB0_60: # %for.end218.loopexit fcvt.d.w fa5, a6 - fcvt.d.w fa4, a4 + fcvt.d.w fa4, a5 .LBB0_61: # %for.end218 lw a0, 0(s4) fcvt.d.w fa3, a0 --- build.a/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/grid/grid_reordering.s 2024-04-01 12:40:58.154483728 +0000 +++ build.b/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/grid/grid_reordering.s 2024-04-01 12:41:10.194147965 +0000 @@ -2249,18 +2249,16 @@ vslideup.vi v19, v16, 4 vmseq.vv v0, v17, v8 vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v16, v14, 1, v0 + vmerge.vim v20, v14, 1, v0 vsetvli zero, zero, e8, mf2, ta, ma vmseq.vv v0, v18, v9 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v20, v14, 1, v0 - vadd.vv v16, v20, v16 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v20, v20, 1, v0.t vsetvli zero, zero, e8, mf2, ta, ma vmseq.vv v0, v19, v10 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v18, v14, 1, v0 - vadd.vv v18, v16, v18 - vmseq.vi v16, v18, 3 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v20, v20, 1, v0.t + vmseq.vi v16, v20, 3 vsetvli zero, zero, e8, mf2, ta, ma vmv.x.s t2, v16 slli t3, t2, 62 --- build.a/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/pair.s 2024-04-01 12:41:02.862352433 +0000 +++ build.b/MultiSource/Benchmarks/MallocBench/espresso/CMakeFiles/espresso.dir/pair.s 2024-04-01 12:41:14.970014779 +0000 @@ -336,24 +336,23 @@ slli a2, a2, 31 sub a2, a2, a4 and a2, a2, a7 - vsetvli a4, zero, e32, m2, ta, ma - vmv.v.i v8, 0 slli a4, s6, 1 + vsetvli a5, zero, e32, m2, ta, ma + vmv.v.i v8, 0 mv a5, a2 mv a6, s10 - vmv.v.i v10, 0 .LBB1_53: # %vector.body # =>This Inner Loop Header: Depth=1 - vl2re32.v v12, (a6) - vmseq.vi v0, v12, 0 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vl2re32.v v10, (a6) + vsetvli zero, zero, e32, m2, ta, mu + vmseq.vi v0, v10, 0 + vadd.vi v8, v8, 1, v0.t sub a5, a5, a3 add a6, a6, a4 bnez a5, .LBB1_53 # %bb.54: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s5, v8 beq a2, a7, .LBB1_57 .LBB1_55: # %for.body60.preheader308 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/imbuf/intern/moviecache.s 2024-04-01 12:40:59.054458629 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/imbuf/intern/moviecache.s 2024-04-01 12:41:11.062123760 +0000 @@ -1137,56 +1137,53 @@ # %bb.17: # %if.end47.preheader addi a0, s6, -1 csrr a4, vlenb - srli a2, a4, 1 - li a1, 1 - bltu a0, a2, .LBB21_21 + srli a3, a4, 1 + li a2, 1 + bltu a0, a3, .LBB21_21 # %bb.18: # %vector.ph - neg a3, a2 - vsetvli a1, zero, e64, m4, ta, ma + neg a1, a3 + and a1, a0, a1 + vsetvli a2, zero, e64, m4, ta, ma vid.v v8 vadd.vi v8, v8, 1 - vsetvli a1, zero, e32, m1, ta, ma - vmv.v.i v16, 0 + vsetvli a2, zero, e32, m1, ta, ma + vmv.v.i v14, 0 vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v16, s7 - vsetvli a1, zero, e32, m2, ta, ma + vmv.s.x v14, s7 + vsetvli a2, zero, e32, m2, ta, ma vmv.v.i v12, 0 - and a3, a0, a3 - vmv.v.i v14, 0 - addi a1, a3, 1 - vmv1r.v v14, v16 + addi a2, a1, 1 + vmv1r.v v12, v14 slli a4, a4, 1 - mv a5, a3 + mv a5, a1 mv a6, s5 .LBB21_19: # %vector.body # =>This Inner Loop Header: Depth=1 addi a7, a6, 4 - vl2re32.v v16, (a7) - vl2re32.v v18, (a6) - vsetvli zero, zero, e32, m2, ta, ma - vsub.vv v16, v16, v18 - vmsne.vi v0, v16, 1 - vmerge.vim v16, v12, 1, v0 - vadd.vv v14, v14, v16 + vl2re32.v v14, (a7) + vl2re32.v v16, (a6) + vsetvli zero, zero, e32, m2, ta, mu + vsub.vv v14, v14, v16 + vmsne.vi v0, v14, 1 + vadd.vi v12, v12, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma vmseq.vx v0, v8, s9 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v16, v12, 1, v0 - vadd.vv v14, v14, v16 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v12, v12, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma - vadd.vx v8, v8, a2 - sub a5, a5, a2 + vadd.vx v8, v8, a3 + sub a5, a5, a3 add a6, a6, a4 bnez a5, .LBB21_19 # %bb.20: # %middle.block vmv.s.x v8, zero vsetvli zero, zero, e32, m2, ta, ma - vredsum.vs v8, v14, v8 + vredsum.vs v8, v12, v8 vmv.x.s s7, v8 - beq a0, a3, .LBB21_23 + beq a0, a1, .LBB21_23 .LBB21_21: # %if.end47.preheader101 - sub a0, s9, a1 - slli a1, a1, 2 + sub a0, s9, a2 + slli a1, a2, 2 add a1, s5, a1 slli a2, s6, 2 add a2, s5, a2 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/mesh_validate.s 2024-04-01 12:40:58.830464876 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/mesh_validate.s 2024-04-01 12:41:10.822130452 +0000 @@ -4976,32 +4976,31 @@ li a6, 4 li a7, 16 mv t0, s2 - vmv.v.i v16, 0 .LBB15_56: # %vector.body45 # =>This Inner Loop Header: Depth=1 - vmv2r.v v18, v8 + vmv2r.v v16, v8 vsetvli zero, zero, e64, m4, ta, ma vadd.vx v20, v12, t0 vsetvli zero, zero, e32, m2, ta, ma vluxei64.v v8, (a4), v20 vsetivli zero, 1, e32, m2, ta, ma - vslidedown.vx v18, v18, a3 + vslidedown.vx v16, v16, a3 vsetvli t1, zero, e32, m2, ta, ma - vslideup.vi v18, v8, 1 - vmseq.vv v0, v18, v8 - vluxei64.v v24, (a6), v20, v0.t - vluxei64.v v26, (a7), v20, v0.t - vmsne.vv v20, v18, v8 - vmsne.vv v18, v24, v26 - vmor.mm v0, v20, v18 - vmerge.vim v18, v10, 1, v0 - vadd.vv v16, v16, v18 + vslideup.vi v16, v8, 1 + vmseq.vv v0, v16, v8 + vluxei64.v v18, (a6), v20, v0.t + vluxei64.v v24, (a7), v20, v0.t + vmsne.vv v20, v16, v8 + vmsne.vv v16, v18, v24 + vmor.mm v0, v20, v16 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v10, v10, 1, v0.t add a5, a5, s9 add t0, t0, a2 bnez a5, .LBB15_56 # %bb.57: # %middle.block34 - vmv.s.x v10, zero - vredsum.vs v10, v16, v10 + vmv.s.x v12, zero + vredsum.vs v10, v10, v12 vmv.x.s s4, v10 srli a2, t2, 1 addi a2, a2, -1 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ira-color.s 2024-04-01 12:41:00.430420256 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ira-color.s 2024-04-01 12:41:12.490083938 +0000 @@ -9194,10 +9194,12 @@ li s3, 2 li s4, -1 vsetivli zero, 4, e32, m1, ta, ma - vmv.v.i v24, 0 + vmv.v.i v8, 0 + addi a1, sp, 544 + vs1r.v v8, (a1) # Unknown-size Folded Spill vsetvli zero, zero, e16, mf2, ta, ma vid.v v8 - vrsub.vi v25, v8, 3 + vrsub.vi v24, v8, 3 vsetvli zero, zero, e64, m2, ta, ma vmv.v.i v26, 1 csrr a1, vlenb @@ -9207,11 +9209,8 @@ addi a1, a1, 544 vs1r.v v24, (a1) # Unknown-size Folded Spill csrr a1, vlenb - slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 544 - vs1r.v v25, (a1) # Unknown-size Folded Spill - addi a1, sp, 544 vs2r.v v26, (a1) # Unknown-size Folded Spill j .LBB25_117 .LBB25_113: # %if.then5.i.i @@ -9366,13 +9365,10 @@ ld a0, 176(sp) # 8-byte Folded Reload ld a0, %pcrel_lo(.Lpcrel_hi316)(a0) call bitmap_clear - addi a0, sp, 544 - vl2r.v v26, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload + vl2r.v v26, (a0) # Unknown-size Folded Reload csrr a0, vlenb slli a1, a0, 1 add a0, a1, a0 @@ -9441,11 +9437,8 @@ addi a0, a0, 544 vl1r.v v24, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload - addi a0, sp, 544 vl2r.v v26, (a0) # Unknown-size Folded Reload .LBB25_142: # %for.inc87.i.i.i # in Loop: Header=BB25_143 Depth=3 @@ -9529,11 +9522,8 @@ addi a0, a0, 544 vl1r.v v24, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload - addi a0, sp, 544 vl2r.v v26, (a0) # Unknown-size Folded Reload li s4, 1 j .LBB25_142 @@ -9583,11 +9573,8 @@ addi a1, a1, 544 vl1r.v v24, (a1) # Unknown-size Folded Reload csrr a1, vlenb - slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 544 - vl1r.v v25, (a1) # Unknown-size Folded Reload - addi a1, sp, 544 vl2r.v v26, (a1) # Unknown-size Folded Reload bnez a0, .LBB25_161 # %bb.160: # %if.then55.i.i.i @@ -9750,11 +9737,8 @@ addi a0, a0, 544 vl1r.v v24, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload - addi a0, sp, 544 vl2r.v v26, (a0) # Unknown-size Folded Reload ld a4, 128(sp) # 8-byte Folded Reload j .LBB25_116 @@ -9848,8 +9832,9 @@ sub t1, t1, t3 add t1, t2, t1 addi t1, t1, 74 - vmv1r.v v9, v24 - vmv1r.v v10, v24 + addi t2, sp, 544 + vl1r.v v10, (t2) # Unknown-size Folded Reload + vmv1r.v v9, v10 .LBB25_189: # %vector.body # Parent Loop BB25_117 Depth=1 # => This Inner Loop Header: Depth=2 @@ -9857,8 +9842,8 @@ vsetivli zero, 4, e16, mf2, ta, ma vle16.v v8, (t2) vle16.v v11, (t0) - vrgather.vv v12, v8, v25 - vrgather.vv v8, v11, v25 + vrgather.vv v12, v8, v24 + vrgather.vv v8, v11, v24 vsetvli zero, zero, e64, m2, ta, ma vsext.vf4 v14, v12 vsext.vf4 v12, v8 @@ -9879,13 +9864,11 @@ vor.vv v12, v18, v12 vmsne.vi v0, v14, 0 vmsne.vi v8, v12, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v11, v24, 1, v0 - vadd.vv v9, v9, v11 - vmv1r.v v0, v8 - vmerge.vim v8, v24, 1, v0 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v9, v9, 1, v0.t addi t0, t0, -16 - vadd.vv v10, v10, v8 + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t bne t0, t1, .LBB25_189 # %bb.190: # %middle.block # in Loop: Header=BB25_117 Depth=1 @@ -9920,13 +9903,10 @@ addi a1, a1, %pcrel_lo(.Lpcrel_hi345) mv a4, s1 call fprintf - addi a0, sp, 544 - vl2r.v v26, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload + vl2r.v v26, (a0) # Unknown-size Folded Reload csrr a0, vlenb slli a1, a0, 1 add a0, a1, a0 --- build.a/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/grid/tria.s 2024-04-01 12:40:58.174483170 +0000 +++ build.b/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/grid/tria.s 2024-04-01 12:41:10.214147408 +0000 @@ -19235,10 +19235,10 @@ vluxei64.v v10, (zero), v10 vand.vx v12, v12, t1 vsrl.vv v10, v10, v12 - vsetvli zero, zero, e32, m1, ta, ma - vnsrl.wi v12, v10, 0 - vand.vi v10, v12, 1 - vadd.vv v8, v8, v10 + vand.vi v10, v10, 1 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t subw a7, a7, t0 vadd.vx v9, v9, t0 bnez a7, .LBB37_401 @@ -24053,62 +24053,61 @@ neg t0, t1 and t0, a0, t0 vsetvli t2, zero, e32, m1, ta, ma - vid.v v9 - vmv.v.i v10, 0 + vid.v v10 + vmv.v.i v9, 0 li t2, 63 mv t3, t0 - vmv.v.i v11, 0 .LBB46_6: # %vector.body # =>This Inner Loop Header: Depth=1 - vsrl.vi v8, v9, 1 + vsetvli zero, zero, e32, m1, ta, ma + vsrl.vi v8, v10, 1 vadd.vx v8, v8, a4 vsetvli zero, zero, e64, m2, ta, ma vzext.vf2 v12, v8 vsll.vi v12, v12, 2 vsetvli zero, zero, e32, m1, ta, ma vluxei64.v v8, (a5), v12 - vand.vi v12, v9, 1 - vadd.vv v12, v8, v12 - vsll.vi v8, v12, 2 + vand.vi v11, v10, 1 + vadd.vv v11, v8, v11 + vsll.vi v8, v11, 2 vsetvli zero, zero, e64, m2, ta, ma - vzext.vf2 v14, v8 - vsll.vi v14, v14, 2 + vzext.vf2 v12, v8 + vsll.vi v12, v12, 2 vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v8, (a6), v14 + vluxei64.v v8, (a6), v12 vmseq.vi v8, v8, -1 vsetvli zero, zero, e64, m2, ta, ma - vsext.vf2 v14, v12 + vsext.vf2 v12, v11 vmv1r.v v0, v8 - vlse64.v v16, (a7), zero, v0.t + vlse64.v v14, (a7), zero, v0.t vsetvli zero, zero, e32, m1, ta, ma - vsra.vi v13, v12, 31 - vsrl.vi v13, v13, 26 - vadd.vv v12, v12, v13 - vsra.vi v12, v12, 6 + vsra.vi v16, v11, 31 + vsrl.vi v16, v16, 26 + vadd.vv v11, v11, v16 + vsra.vi v11, v11, 6 vsetvli zero, zero, e64, m2, ta, mu - vand.vx v18, v14, a2 - vmsgtu.vx v0, v18, a3 - vsext.vf2 v18, v12 - vsll.vi v12, v18, 3 - vadd.vv v12, v16, v12 - vadd.vi v12, v12, -8, v0.t - vand.vx v14, v14, t2 + vand.vx v16, v12, a2 + vmsgtu.vx v0, v16, a3 + vsext.vf2 v16, v11 + vsll.vi v16, v16, 3 + vadd.vv v14, v14, v16 + vadd.vi v14, v14, -8, v0.t + vand.vx v12, v12, t2 vsetvli zero, zero, e64, m2, ta, ma vmv1r.v v0, v8 - vluxei64.v v12, (zero), v12, v0.t - vsrl.vv v12, v12, v14 + vluxei64.v v14, (zero), v14, v0.t + vsrl.vv v12, v14, v12 vand.vi v12, v12, 1 - vmsne.vi v14, v12, 0 - vmand.mm v0, v8, v14 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v8, v10, 1, v0 - vadd.vv v11, v11, v8 + vmsne.vi v11, v12, 0 + vmand.mm v0, v8, v11 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v9, v9, 1, v0.t subw t3, t3, t1 - vadd.vx v9, v9, t1 + vadd.vx v10, v10, t1 bnez t3, .LBB46_6 # %bb.7: # %middle.block vmv.s.x v8, zero - vredsum.vs v8, v11, v8 + vredsum.vs v8, v9, v8 vmv.x.s t1, v8 bne a0, t0, .LBB46_12 .LBB46_8: # %for.cond.cleanup --- build.a/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s 2024-04-01 12:41:02.786354552 +0000 +++ build.b/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/CMakeFiles/CLAMR.dir/partition.s 2024-04-01 12:41:14.894016898 +0000 @@ -1135,101 +1135,105 @@ vsetvli zero, zero, e64, m4, ta, ma vmsltu.vx v8, v16, a0 vmv1r.v v0, v8 - vle32.v v24, (s1), v0.t - vsetvli zero, zero, e32, m2, ta, ma - vmslt.vx v9, v24, t2 - vmsle.vv v26, v20, v24 - vmor.mm v0, v9, v26 - vmerge.vim v26, v10, 1, v0 - vadd.vv v26, v22, v26 + vle32.v v26, (s1), v0.t + vsetvli zero, zero, e32, m2, ta, mu + vmslt.vx v9, v26, t2 + vmsle.vv v24, v20, v26 + vmor.mm v0, v9, v24 + vmv2r.v v24, v22 + vadd.vi v24, v22, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v28, v24 + vsext.vf2 v28, v26 vsll.vi v28, v28, 2 vsetvli zero, zero, e32, m2, ta, ma vmv1r.v v0, v8 vluxei64.v v6, (a4), v28, v0.t - vle32.v v24, (s3), v0.t - vmslt.vv v9, v24, v6 + vle32.v v26, (s3), v0.t + vmslt.vv v9, v26, v6 vmand.mm v0, v8, v9 vluxei64.v v6, (a5), v28, v0.t vmslt.vx v9, v6, t2 vmsle.vv v28, v20, v6 vmor.mm v9, v9, v28 vmand.mm v0, v0, v9 - vmerge.vim v28, v10, 1, v0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v24, v24, 1, v0.t + vsetvli zero, zero, e32, m2, ta, ma vmv1r.v v0, v8 - vle32.v v30, (s5), v0.t - vmslt.vx v9, v30, t2 - vmsle.vv v7, v20, v30 - vmor.mm v0, v9, v7 - vadd.vv v26, v26, v28 - vmerge.vim v28, v10, 1, v0 - vadd.vv v26, v26, v28 + vle32.v v28, (s5), v0.t + vmslt.vx v9, v28, t2 + vmsle.vv v30, v20, v28 + vmor.mm v0, v9, v30 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v24, v24, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v4, v30 + vsext.vf2 v4, v28 vsll.vi v28, v4, 2 vsetvli zero, zero, e32, m2, ta, ma vmv1r.v v0, v8 vluxei64.v v6, (a4), v28, v0.t - vmslt.vv v9, v24, v6 + vmslt.vv v9, v26, v6 vmand.mm v0, v8, v9 vluxei64.v v6, (a5), v28, v0.t vmslt.vx v9, v6, t2 vmsle.vv v28, v20, v6 vmor.mm v9, v9, v28 vmand.mm v0, v0, v9 - vmerge.vim v28, v10, 1, v0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v24, v24, 1, v0.t + vsetvli zero, zero, e32, m2, ta, ma vmv1r.v v0, v8 - vle32.v v30, (s6), v0.t - vmslt.vx v9, v30, t2 - vmsle.vv v7, v20, v30 - vmor.mm v0, v9, v7 - vadd.vv v26, v26, v28 - vmerge.vim v28, v10, 1, v0 - vadd.vv v26, v26, v28 + vle32.v v28, (s6), v0.t + vmslt.vx v9, v28, t2 + vmsle.vv v30, v20, v28 + vmor.mm v0, v9, v30 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v24, v24, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v4, v30 + vsext.vf2 v4, v28 vsll.vi v28, v4, 2 vsetvli zero, zero, e32, m2, ta, ma vmv1r.v v0, v8 vluxei64.v v6, (a4), v28, v0.t - vmslt.vv v9, v24, v6 + vmslt.vv v9, v26, v6 vmand.mm v0, v8, v9 vluxei64.v v6, (a6), v28, v0.t vmslt.vx v9, v6, t2 vmsle.vv v28, v20, v6 vmor.mm v9, v9, v28 vmand.mm v0, v0, v9 - vmerge.vim v28, v10, 1, v0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v24, v24, 1, v0.t + vsetvli zero, zero, e32, m2, ta, ma vmv1r.v v0, v8 - vle32.v v30, (s7), v0.t - vmslt.vx v9, v30, t2 - vmsle.vv v7, v20, v30 - vmor.mm v0, v9, v7 - vadd.vv v26, v26, v28 - vmerge.vim v28, v10, 1, v0 - vadd.vv v26, v26, v28 + vle32.v v28, (s7), v0.t + vmslt.vx v9, v28, t2 + vmsle.vv v30, v20, v28 + vmor.mm v0, v9, v30 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v24, v24, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma - vsext.vf2 v4, v30 + vsext.vf2 v4, v28 vsll.vi v28, v4, 2 vsetvli zero, zero, e32, m2, ta, ma vmv1r.v v0, v8 vluxei64.v v6, (a4), v28, v0.t - vmslt.vv v9, v24, v6 + vmslt.vv v9, v26, v6 vmand.mm v9, v8, v9 vmv1r.v v0, v9 vluxei64.v v4, (a6), v28, v0.t - vmsle.vv v28, v6, v24 - vmslt.vx v24, v4, t2 - vmsle.vv v25, v20, v4 - vmor.mm v0, v24, v25 + vmsle.vv v28, v6, v26 + vmslt.vx v26, v4, t2 + vmsle.vv v27, v20, v4 + vmor.mm v0, v26, v27 + vsetvli zero, zero, e32, m2, ta, mu + vmv.v.v v26, v24 vmand.mm v8, v8, v28 - vmerge.vim v24, v10, 1, v0 + vadd.vi v26, v24, 1, v0.t vmv1r.v v0, v8 - vmerge.vvm v22, v22, v26, v0 - vsetvli zero, zero, e32, m2, ta, mu + vmerge.vvm v22, v22, v24, v0 vmv1r.v v0, v9 - vadd.vv v22, v26, v24, v0.t + vmerge.vvm v22, v22, v26, v0 vsetvli zero, zero, e64, m4, ta, ma vadd.vx v16, v16, t0 sub s8, s8, t0 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-dump.s 2024-04-01 12:41:00.510418025 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-dump.s 2024-04-01 12:41:12.574081595 +0000 @@ -5338,18 +5338,17 @@ bnez t1, .LBB20_24 # %bb.33: # %vector.ph srli a7, t0, 1 - vsetvli a5, zero, e64, m4, ta, ma + neg a5, a7 + vsetvli t1, zero, e64, m4, ta, ma vid.v v8 - vsetvli a5, zero, e32, m1, ta, ma - vmv.v.i v16, 0 + vsetvli t1, zero, e32, m1, ta, ma + vmv.v.i v14, 0 vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v16, a1 + vmv.s.x v14, a1 vsetvli a1, zero, e32, m2, ta, ma - vmv.v.i v14, 0 - neg a5, a7 vmv.v.i v12, 0 and a5, a3, a5 - vmv1r.v v12, v16 + vmv1r.v v12, v14 li a1, 20 mul a1, t0, a1 vsetvli zero, zero, e64, m4, ta, ma @@ -5357,23 +5356,22 @@ li t0, 40 li t1, 24 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v20, -1 + vmv.v.i v14, -1 li t2, 28 mv t3, a5 .LBB20_34: # %vector.body # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e64, m4, ta, ma - vmv4r.v v24, v8 - vmadd.vx v24, t0, v16 - vsetvli zero, zero, e32, m2, ta, ma - vluxei64.v v22, (t1), v24 - vand.vx v28, v22, a2 - vmsne.vi v0, v28, 0 - vsoxei64.v v20, (t2), v24, v0.t - vor.vx v22, v22, a0 - vsse32.v v22, (a6), t0, v0.t - vmerge.vim v22, v14, 1, v0 - vadd.vv v12, v12, v22 + vmv4r.v v20, v8 + vmadd.vx v20, t0, v16 + vsetvli zero, zero, e32, m2, ta, mu + vluxei64.v v24, (t1), v20 + vand.vx v26, v24, a2 + vmsne.vi v0, v26, 0 + vsoxei64.v v14, (t2), v20, v0.t + vor.vx v20, v24, a0 + vsse32.v v20, (a6), t0, v0.t + vadd.vi v12, v12, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma vadd.vx v8, v8, a7 sub t3, t3, a7 --- build.a/MultiSource/Benchmarks/NPB-serial/is/CMakeFiles/is.dir/is.s 2024-04-01 12:41:02.974349310 +0000 +++ build.b/MultiSource/Benchmarks/NPB-serial/is/CMakeFiles/is.dir/is.s 2024-04-01 12:41:15.062012213 +0000 @@ -391,24 +391,22 @@ vmv.v.i v10, 0 addi a6, a1, -1 mv a7, a2 - vmv.v.i v12, 0 .LBB2_3: # %vector.body # =>This Inner Loop Header: Depth=1 - vmv2r.v v14, v8 + vmv2r.v v12, v8 vl2re32.v v8, (a4) vsetivli zero, 1, e32, m2, ta, ma - vslidedown.vx v14, v14, a6 - vsetvli t0, zero, e32, m2, ta, ma - vslideup.vi v14, v8, 1 - vmslt.vv v0, v8, v14 - vmerge.vim v14, v10, 1, v0 - vadd.vv v12, v12, v14 + vslidedown.vx v12, v12, a6 + vsetvli t0, zero, e32, m2, ta, mu + vslideup.vi v12, v8, 1 + vmslt.vv v0, v8, v12 + vadd.vi v10, v10, 1, v0.t sub a7, a7, a1 add a4, a4, a5 bnez a7, .LBB2_3 # %bb.4: # %middle.block - vmv.s.x v10, zero - vredsum.vs v10, v12, v10 + vmv.s.x v12, zero + vredsum.vs v10, v10, v12 vmv.x.s a1, v10 vsetivli zero, 1, e32, m2, ta, ma vslidedown.vx v8, v8, a3 @@ -1062,24 +1060,22 @@ vmv.v.i v10, 0 addi a6, a1, -1 mv a7, a2 - vmv.v.i v12, 0 .LBB5_3: # %vector.body # =>This Inner Loop Header: Depth=1 - vmv2r.v v14, v8 + vmv2r.v v12, v8 vl2re32.v v8, (a4) vsetivli zero, 1, e32, m2, ta, ma - vslidedown.vx v14, v14, a6 - vsetvli t0, zero, e32, m2, ta, ma - vslideup.vi v14, v8, 1 - vmslt.vv v0, v8, v14 - vmerge.vim v14, v10, 1, v0 - vadd.vv v12, v12, v14 + vslidedown.vx v12, v12, a6 + vsetvli t0, zero, e32, m2, ta, mu + vslideup.vi v12, v8, 1 + vmslt.vv v0, v8, v12 + vadd.vi v10, v10, 1, v0.t sub a7, a7, a1 add a4, a4, a5 bnez a7, .LBB5_3 # %bb.4: # %middle.block - vmv.s.x v10, zero - vredsum.vs v10, v12, v10 + vmv.s.x v12, zero + vredsum.vs v10, v10, v12 vmv.x.s a1, v10 vsetivli zero, 1, e32, m2, ta, ma vslidedown.vx v8, v8, a3 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/omega.s 2024-04-01 12:41:00.450419698 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/omega.s 2024-04-01 12:41:12.510083380 +0000 @@ -1208,14 +1208,13 @@ mv a6, a7 .LBB6_16: # %vector.ph vsetvli a7, zero, e32, m1, ta, ma - vmv.v.i v12, 0 + vmv.v.i v10, 0 vsetvli zero, zero, e32, m1, tu, ma - vmv.s.x v12, a0 + vmv.s.x v10, a0 vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v10, 0 vmv.v.i v8, 0 sub a6, a3, a6 - vmv1r.v v8, v12 + vmv1r.v v8, v10 addi a0, a4, 8 li a7, 12 mul a7, a2, a7 @@ -1223,10 +1222,10 @@ mv t1, a6 .LBB6_17: # %vector.body # =>This Inner Loop Header: Depth=1 - vlse32.v v12, (a0), t0 - vmseq.vi v0, v12, 1 - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vsetvli zero, zero, e32, m2, ta, mu + vlse32.v v10, (a0), t0 + vmseq.vi v0, v10, 1 + vadd.vi v8, v8, 1, v0.t sub t1, t1, a5 add a0, a0, a7 bnez t1, .LBB6_17 @@ -11887,19 +11886,18 @@ vmv.v.i v8, 0 li a6, 24 mv a7, a2 - vmv.v.i v10, 0 .LBB18_17: # %vector.body # =>This Inner Loop Header: Depth=1 - vlse32.v v12, (a4), a6 - vmseq.vi v0, v12, 1 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vsetvli zero, zero, e32, m2, ta, mu + vlse32.v v10, (a4), a6 + vmseq.vi v0, v10, 1 + vadd.vi v8, v8, 1, v0.t sub a7, a7, a3 add a4, a4, a5 bnez a7, .LBB18_17 # %bb.18: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a3, v8 j .LBB18_21 .LBB18_19: # %if.then42 --- build.a/External/SPEC/CFP2017rate/511.povray_r/CMakeFiles/511.povray_r.dir/root/cpu2017/benchspec/CPU/511.povray_r/src/hfield.s 2024-04-01 12:40:58.578471904 +0000 +++ build.b/External/SPEC/CFP2017rate/511.povray_r/CMakeFiles/511.povray_r.dir/root/cpu2017/benchspec/CPU/511.povray_r/src/hfield.s 2024-04-01 12:41:10.570137480 +0000 @@ -2284,27 +2284,25 @@ vle32.v v9, (a0) vfncvt.rtz.x.f.w v10, v8 li s0, 1 - vmax.vx v8, v10, s0 - vadd.vi v9, v9, 2 - vfwcvt.f.x.v v10, v9 - vfwcvt.f.x.v v11, v8 + vmax.vx v11, v10, s0 + vadd.vi v8, v9, 2 + vfwcvt.f.x.v v9, v8 + vfwcvt.f.x.v v10, v11 vsetvli zero, zero, e64, m1, ta, ma - vfdiv.vv v10, v10, v11 - vsetvli zero, zero, e32, mf2, ta, ma + vfdiv.vv v9, v9, v10 + vsetvli zero, zero, e32, mf2, ta, mu fsrmi a0, 3 - vfncvt.x.f.w v11, v10 + vfncvt.x.f.w v10, v9 + lbu a1, 117(s11) fsrm a0 - lbu a0, 117(s11) - vmul.vv v10, v8, v11 - vmslt.vv v0, v10, v9 - vmv.v.i v9, 0 - andi a0, a0, 4 - vmerge.vim v9, v9, 1, v0 - beqz a0, .LBB10_65 + vmul.vv v9, v11, v10 + vmslt.vv v0, v9, v8 + andi a1, a1, 4 + vadd.vi v11, v11, 1, v0.t + beqz a1, .LBB10_65 # %bb.63: # %lor.lhs.false.i - vadd.vv v9, v8, v9 - vmv.x.s s3, v9 - vslidedown.vi v8, v9, 1 + vmv.x.s s3, v11 + vslidedown.vi v8, v11, 1 vmv.x.s a0, v8 bne s3, s0, .LBB10_67 # %bb.64: # %lor.lhs.false.i @@ -2398,14 +2396,14 @@ mv a1, s1 mv a3, s2 csrr a4, vlenb - slli a4, a4, 1 add a4, sp, a4 addi a4, a4, 96 vs1r.v v11, (a4) # Unknown-size Folded Spill csrr a4, vlenb + slli a4, a4, 1 add a4, sp, a4 addi a4, a4, 96 - vs1r.v v9, (a4) # Unknown-size Folded Spill + vs1r.v v10, (a4) # Unknown-size Folded Spill call _ZN3pov10pov_mallocEmPKciS1_ ld a1, 168(s11) li s4, 0 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/customdata.s 2024-04-01 12:40:58.806465545 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenkernel/intern/customdata.s 2024-04-01 12:41:10.798131122 +0000 @@ -2760,19 +2760,18 @@ li a6, 104 mv a7, a3 mv t0, a4 - vmv.v.i v10, 0 .LBB37_5: # %vector.body # =>This Inner Loop Header: Depth=1 - vlse32.v v12, (a7), a6 - vmseq.vx v0, v12, a1 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vsetvli zero, zero, e32, m2, ta, mu + vlse32.v v10, (a7), a6 + vmseq.vx v0, v10, a1 + vadd.vi v8, v8, 1, v0.t sub t0, t0, a0 add a7, a7, a5 bnez t0, .LBB37_5 # %bb.6: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a0, v8 beq a4, a2, .LBB37_9 .LBB37_7: # %for.body.preheader @@ -2832,11 +2831,14 @@ mv t0, a4 .LBB38_5: # %vector.body # =>This Inner Loop Header: Depth=1 - vsetvli zero, zero, e32, m2, ta, ma + vsetvli zero, zero, e64, m4, ta, ma vlse32.v v10, (a7), a6 - vnsrl.wv v16, v12, v10 - vand.vi v10, v16, 1 - vadd.vv v8, v8, v10 + vzext.vf2 v16, v10 + vsrl.vv v16, v12, v16 + vand.vi v16, v16, 1 + vmsne.vi v0, v16, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub t0, t0, a0 add a7, a7, a5 bnez t0, .LBB38_5 @@ -7069,11 +7071,14 @@ mv t3, a4 .LBB65_4: # %vector.body # =>This Inner Loop Header: Depth=1 - vsetvli zero, zero, e32, m2, ta, ma + vsetvli zero, zero, e64, m4, ta, ma vlse32.v v10, (t2), t1 - vnsrl.wv v16, v12, v10 - vand.vi v10, v16, 1 - vadd.vv v8, v8, v10 + vzext.vf2 v16, v10 + vsrl.vv v16, v12, v16 + vand.vi v16, v16, 1 + vmsne.vi v0, v16, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub t3, t3, a7 add t2, t2, t0 bnez t3, .LBB65_4 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/reload.s 2024-04-01 12:41:00.478418917 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/reload.s 2024-04-01 12:41:12.542082487 +0000 @@ -9405,8 +9405,6 @@ vsetvli a0, zero, e32, m2, ta, ma vmv.v.i v8, 0 csrr a0, vlenb - slli a2, a0, 1 - add a0, a2, a0 add a0, sp, a0 lui a2, 1 addiw a2, a2, 16 @@ -9415,6 +9413,8 @@ vsetvli zero, zero, e8, mf2, ta, ma vmv.v.i v8, 0 csrr a0, vlenb + slli a2, a0, 1 + add a0, a2, a0 add a0, sp, a0 lui a2, 1 addiw a2, a2, 16 @@ -9654,18 +9654,18 @@ # Parent Loop BB23_104 Depth=2 # Parent Loop BB23_109 Depth=3 # => This Inner Loop Header: Depth=4 - ld a4, 8(s7) - lwu a2, 0(a4) - and a1, a2, s4 + ld a2, 8(s7) + lwu a4, 0(a2) + and a1, a4, s4 bne a1, s5, .LBB23_121 # %bb.117: # %land.lhs.true655 # in Loop: Header=BB23_116 Depth=4 - lw a0, 8(a4) + lw a0, 8(a2) li a1, 37 bltu s10, a0, .LBB23_121 # %bb.118: # %if.then661 # in Loop: Header=BB23_116 Depth=4 - slli a1, a2, 40 + slli a1, a4, 40 lw a2, 16(s7) srli a1, a1, 56 slli a3, a3, 40 @@ -9691,8 +9691,8 @@ and a1, a3, s4 j .LBB23_122 .LBB23_121: # in Loop: Header=BB23_116 Depth=4 - mv a3, a2 - mv s7, a4 + mv a3, a4 + mv s7, a2 .LBB23_122: # %if.end695 # in Loop: Header=BB23_116 Depth=4 slli a0, a1, 2 @@ -9935,14 +9935,6 @@ add a0, a1, a0 lbu a0, 0(a0) mv a6, s2 - csrr a1, vlenb - slli a2, a1, 1 - add a1, a2, a1 - add a1, sp, a1 - lui a2, 1 - addiw a2, a2, 16 - add a1, a1, a2 - vl2r.v v8, (a1) # Unknown-size Folded Reload beqz a0, .LBB23_172 .LBB23_154: # %if.then853 # in Loop: Header=BB23_148 Depth=4 @@ -10063,14 +10055,6 @@ sub a0, a0, s10 .LBB23_171: # %cond.true813 # in Loop: Header=BB23_148 Depth=4 - csrr a1, vlenb - slli a2, a1, 1 - add a1, a2, a1 - add a1, sp, a1 - lui a2, 1 - addiw a2, a2, 16 - add a1, a1, a2 - vl2r.v v8, (a1) # Unknown-size Folded Reload sext.w a0, a0 li a1, 30 mul a0, a0, a1 @@ -10153,14 +10137,6 @@ snez a0, a0 subw a0, s4, a0 sd a0, 608(sp) # 8-byte Folded Spill - csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 - add a0, sp, a0 - lui a1, 1 - addiw a1, a1, 16 - add a0, a0, a1 - vl2r.v v8, (a0) # Unknown-size Folded Reload ld s4, 592(sp) # 8-byte Folded Reload ld a2, 632(sp) # 8-byte Folded Reload sw s11, 0(s5) @@ -10191,6 +10167,12 @@ addi a1, a1, 705 vsetvli a2, zero, e32, m2, ta, ma mv a2, a0 + csrr a3, vlenb + add a3, sp, a3 + lui a4, 1 + addiw a4, a4, 16 + add a3, a3, a4 + vl2r.v v8, (a3) # Unknown-size Folded Reload ld a3, 376(sp) # 8-byte Folded Reload .LBB23_185: # %vector.body2208 # Parent Loop BB23_101 Depth=1 @@ -11321,13 +11303,7 @@ lui a2, 1 addiw a2, a2, 16 add a1, a1, a2 - vl2r.v v16, (a1) # Unknown-size Folded Reload - csrr a1, vlenb - add a1, sp, a1 - lui a2, 1 - addiw a2, a2, 16 - add a1, a1, a2 - vl1r.v v8, (a1) # Unknown-size Folded Reload + vl1r.v v16, (a1) # Unknown-size Folded Reload beqz a0, .LBB23_349 .LBB23_361: # %if.then1768 # in Loop: Header=BB23_351 Depth=4 @@ -11413,13 +11389,7 @@ lui a1, 1 addiw a1, a1, 16 add a0, a0, a1 - vl2r.v v16, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - add a0, sp, a0 - lui a1, 1 - addiw a1, a1, 16 - add a0, a0, a1 - vl1r.v v8, (a0) # Unknown-size Folded Reload + vl1r.v v16, (a0) # Unknown-size Folded Reload ld a0, 632(sp) # 8-byte Folded Reload bge s10, a0, .LBB23_382 # %bb.372: # %if.end72.i @@ -11453,13 +11423,7 @@ lui a3, 1 addiw a3, a3, 16 add a2, a2, a3 - vl2r.v v16, (a2) # Unknown-size Folded Reload - csrr a2, vlenb - add a2, sp, a2 - lui a3, 1 - addiw a3, a3, 16 - add a2, a2, a3 - vl1r.v v8, (a2) # Unknown-size Folded Reload + vl1r.v v16, (a2) # Unknown-size Folded Reload ld s10, 448(sp) # 8-byte Folded Reload beq a0, a1, .LBB23_349 # %bb.375: # %land.lhs.true38.i @@ -11488,13 +11452,7 @@ lui a3, 1 addiw a3, a3, 16 add a2, a2, a3 - vl2r.v v16, (a2) # Unknown-size Folded Reload - csrr a2, vlenb - add a2, sp, a2 - lui a3, 1 - addiw a3, a3, 16 - add a2, a2, a3 - vl1r.v v8, (a2) # Unknown-size Folded Reload + vl1r.v v16, (a2) # Unknown-size Folded Reload ld s10, 448(sp) # 8-byte Folded Reload bne a0, a1, .LBB23_361 # %bb.379: # %land.lhs.true58.i @@ -11551,8 +11509,12 @@ add a1, sp, a1 vl1r.v v10, (a1) # Unknown-size Folded Reload vmv.s.x v10, a3 - vmv1r.v v18, v8 - vmv2r.v v8, v16 + csrr a1, vlenb + add a1, sp, a1 + lui a2, 1 + addiw a2, a2, 16 + add a1, a1, a2 + vl2r.v v8, (a1) # Unknown-size Folded Reload and a0, a0, s10 vmv1r.v v8, v10 addi a1, sp, 2047 @@ -11576,11 +11538,10 @@ vsetvli zero, zero, e8, mf2, ta, ma vmsne.vi v10, v10, 0 vmand.mm v0, v0, v10 - vse8.v v18, (a3), v0.t - vse8.v v18, (a2), v0.t - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v10, v16, 1, v0 - vadd.vv v8, v8, v10 + vse8.v v16, (a3), v0.t + vse8.v v16, (a2), v0.t + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t add a1, a1, a7 add a2, a2, a6 sub a4, a4, a6 @@ -14500,18 +14461,18 @@ ret .LBB23_803: # %for.body45.lr.ph.i li s5, 0 - li s2, 0 + li s9, 0 addi s7, s3, 48 - li s9, 9 + li s10, 9 .Lpcrel_hi314: auipc a0, %got_pcrel_hi(mode_class) - ld s10, %pcrel_lo(.Lpcrel_hi314)(a0) + ld s11, %pcrel_lo(.Lpcrel_hi314)(a0) .Lpcrel_hi315: auipc a0, %pcrel_hi(secondary_memlocs_elim) addi s6, a0, %pcrel_lo(.Lpcrel_hi315) .Lpcrel_hi316: auipc a0, %got_pcrel_hi(mode_size) - ld s11, %pcrel_lo(.Lpcrel_hi316)(a0) + ld s2, %pcrel_lo(.Lpcrel_hi316)(a0) li s8, 608 j .LBB23_806 .LBB23_804: # %land.lhs.true335.i @@ -14526,10 +14487,10 @@ .LBB23_805: # %for.inc477.i # in Loop: Header=BB23_806 Depth=1 lw a0, %pcrel_lo(.Lpcrel_hi225)(a6) - addi s2, s2, 1 + addi s9, s9, 1 addi s5, s5, 1 addi s7, s7, 104 - bge s2, a0, .LBB23_839 + bge s9, a0, .LBB23_839 .LBB23_806: # %for.body45.i # =>This Inner Loop Header: Depth=1 ld a0, -48(s7) @@ -14542,7 +14503,7 @@ # %bb.808: # %land.lhs.true65.i # in Loop: Header=BB23_806 Depth=1 lw a0, 44(s7) - bltu s9, a0, .LBB23_810 + bltu s10, a0, .LBB23_810 # %bb.809: # %land.lhs.true65.i # in Loop: Header=BB23_806 Depth=1 srl a0, s8, a0 @@ -14562,7 +14523,7 @@ beq a1, a2, .LBB23_813 # %bb.812: # %cond.false105.i # in Loop: Header=BB23_806 Depth=1 - add a1, s11, a1 + add a1, s2, a1 lbu a0, 0(a1) .LBB23_813: # %cond.end.i1247 # in Loop: Header=BB23_806 Depth=1 @@ -14571,7 +14532,7 @@ j .LBB23_815 .LBB23_814: # %cond.true.i1249 # in Loop: Header=BB23_806 Depth=1 - add a1, s10, a1 + add a1, s11, a1 lbu a0, 0(a1) andi a0, a0, 254 addi a0, a0, -10 @@ -14592,7 +14553,7 @@ beq a1, a2, .LBB23_818 # %bb.817: # %cond.false147.i # in Loop: Header=BB23_806 Depth=1 - add a0, s11, a1 + add a0, s2, a1 lbu a0, 0(a0) .LBB23_818: # %cond.end155.i # in Loop: Header=BB23_806 Depth=1 @@ -14601,7 +14562,7 @@ j .LBB23_820 .LBB23_819: # %cond.true121.i # in Loop: Header=BB23_806 Depth=1 - add a0, s10, a1 + add a0, s11, a1 lbu a0, 0(a0) andi a0, a0, 254 addi a0, a0, -10 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ifcvt.s 2024-04-01 12:41:00.354422375 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ifcvt.s 2024-04-01 12:41:12.406086280 +0000 @@ -3283,22 +3283,20 @@ neg a1, a1 and a1, a1, a0 vsetvli a5, zero, e32, m1, ta, ma - vmv.v.i v9, 0 - vsetvli zero, zero, e32, m1, tu, ma vmv.v.i v8, 0 + vsetvli zero, zero, e32, m1, tu, ma vmv.s.x v8, s1 mv a5, a1 .LBB10_25: # %vector.body # =>This Inner Loop Header: Depth=1 - vl1re32.v v10, (a4) + vl1re32.v v9, (a4) vsetvli zero, zero, e64, m2, ta, ma - vsext.vf2 v12, v10 - vsll.vi v10, v12, 3 + vsext.vf2 v10, v9 + vsll.vi v10, v10, 3 vluxei64.v v10, (s10), v10 vmseq.vi v0, v10, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v10, v9, 1, v0 - vadd.vv v8, v8, v10 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, a3 add a4, a4, a2 bnez a5, .LBB10_25 --- build.a/MultiSource/Benchmarks/Prolangs-C/bison/CMakeFiles/mybison.dir/output.s 2024-04-01 12:41:02.990348863 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/bison/CMakeFiles/mybison.dir/output.s 2024-04-01 12:41:15.082011656 +0000 @@ -1771,18 +1771,16 @@ sub a2, a2, a4 and a2, a2, a0 vsetvli a4, zero, e32, m2, ta, ma - vmv.v.i v10, 0 + vmv.v.i v8, 0 mv a4, a2 mv a5, a1 - vmv.v.i v8, 0 .LBB12_4: # %vector.body # =>This Inner Loop Header: Depth=1 - vl1re16.v v12, (a5) + vl1re16.v v10, (a5) vsetvli zero, zero, e16, m1, ta, ma - vmsne.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, a3 add a5, a5, s6 bnez a4, .LBB12_4 @@ -1883,10 +1881,10 @@ auipc a0, %pcrel_hi(tos) sd a0, 32(sp) # 8-byte Folded Spill vsetvli a0, zero, e32, m2, ta, ma - vmv.v.i v12, 0 - li s10, 10 + vmv.v.i v8, 0 addi a0, sp, 80 - vs2r.v v12, (a0) # Unknown-size Folded Spill + vs2r.v v8, (a0) # Unknown-size Folded Spill + li s10, 10 j .LBB12_19 .LBB12_17: # %for.end35.i25 # in Loop: Header=BB12_19 Depth=1 @@ -1938,8 +1936,6 @@ mv a1, s0 call fprintf lw a0, 0(s2) - addi a1, sp, 80 - vl2r.v v12, (a1) # Unknown-size Folded Reload blez a0, .LBB12_18 # %bb.23: # %for.body.lr.ph.i7 # in Loop: Header=BB12_19 Depth=1 @@ -1957,16 +1953,16 @@ and a2, a2, a0 mv a3, a2 mv a4, a1 - vmv2r.v v8, v12 + addi a5, sp, 80 + vl2r.v v8, (a5) # Unknown-size Folded Reload .LBB12_26: # %vector.body61 # Parent Loop BB12_19 Depth=1 # => This Inner Loop Header: Depth=2 vl1re16.v v10, (a4) vsetvli a5, zero, e16, m1, ta, ma vmsne.vi v0, v10, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v10, v12, 1, v0 - vadd.vv v8, v8, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a3, a3, s11 add a4, a4, s6 bnez a3, .LBB12_26 @@ -3674,10 +3670,10 @@ # Parent Loop BB21_47 Depth=1 # => This Inner Loop Header: Depth=2 vl1re16.v v12, (s2) + vsetvli zero, zero, e32, m2, ta, mu vsext.vf2 v14, v12 vmseq.vx v0, v14, a0 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vadd.vi v10, v10, 1, v0.t sub s0, s0, t3 add s2, s2, a4 bnez s0, .LBB21_50 @@ -3807,14 +3803,14 @@ # %bb.1: # %for.body.lr.ph mv s0, a0 .Lpcrel_hi215: - auipc s4, %pcrel_hi(actrow) - ld a0, %pcrel_lo(.Lpcrel_hi215)(s4) + auipc s3, %pcrel_hi(actrow) + ld a0, %pcrel_lo(.Lpcrel_hi215)(s3) csrr a3, vlenb srli a4, a3, 1 bgeu a1, a4, .LBB22_3 # %bb.2: li a2, 0 - li s3, 0 + li s4, 0 j .LBB22_6 .LBB22_3: # %vector.ph srli a2, a3, 3 @@ -3823,25 +3819,23 @@ sub a2, a2, a5 and a2, a2, a1 vsetvli a5, zero, e32, m2, ta, ma - vmv.v.i v10, 0 + vmv.v.i v8, 0 mv a5, a2 mv a6, a0 - vmv.v.i v8, 0 .LBB22_4: # %vector.body # =>This Inner Loop Header: Depth=1 - vl1re16.v v12, (a6) + vl1re16.v v10, (a6) vsetvli zero, zero, e16, m1, ta, ma - vmsne.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a5, a5, a4 add a6, a6, a3 bnez a5, .LBB22_4 # %bb.5: # %middle.block vmv.s.x v10, zero vredsum.vs v8, v8, v10 - vmv.x.s s3, v8 + vmv.x.s s4, v8 beq a2, a1, .LBB22_8 .LBB22_6: # %for.body.preheader slli a2, a2, 1 @@ -3853,12 +3847,12 @@ lhu a1, 0(a2) snez a1, a1 addi a2, a2, 2 - addw s3, s3, a1 + addw s4, s4, a1 bne a2, a0, .LBB22_7 .LBB22_8: # %for.end - beqz s3, .LBB22_15 + beqz s4, .LBB22_15 # %bb.9: # %if.end7 - slliw s2, s3, 1 + slliw s2, s4, 1 mv a0, s2 call mallocate .Lpcrel_hi216: @@ -3879,7 +3873,7 @@ mv a2, s1 blez a1, .LBB22_14 # %bb.10: # %for.body21.lr.ph - ld a3, %pcrel_lo(.Lpcrel_hi215)(s4) + ld a3, %pcrel_lo(.Lpcrel_hi215)(s3) li a4, 0 mv a2, s1 j .LBB22_12 @@ -3907,7 +3901,7 @@ ld a0, %pcrel_lo(.Lpcrel_hi218)(a0) slli s0, s0, 1 add a0, a0, s0 - sh s3, 0(a0) + sh s4, 0(a0) lh a0, -2(a2) lh a1, 0(s1) .Lpcrel_hi219: @@ -4101,24 +4095,23 @@ and a5, a3, a5 add a4, a5, s3 slli t0, s3, 1 + add t0, a2, t0 vsetvli t1, zero, e32, m2, ta, ma vmv.v.i v8, 0 - add t0, a2, t0 mv t1, a5 - vmv.v.i v10, 0 .LBB24_4: # %vector.body # =>This Inner Loop Header: Depth=1 - vl1re16.v v12, (t0) - vsext.vf2 v14, v12 - vmsne.vx v0, v14, a1 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vl1re16.v v10, (t0) + vsetvli zero, zero, e32, m2, ta, mu + vsext.vf2 v12, v10 + vmsne.vx v0, v12, a1 + vadd.vi v8, v8, 1, v0.t sub t1, t1, a7 add t0, t0, a6 bnez t1, .LBB24_4 # %bb.5: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s4, v8 beq a3, a5, .LBB24_8 .LBB24_6: # %for.body.preheader --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/render/intern/source/initrender.s 2024-04-01 12:40:59.094457514 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/render/intern/source/initrender.s 2024-04-01 12:41:11.106122532 +0000 @@ -694,76 +694,52 @@ vid.v v24 li a2, 127 add a3, s5, s5 - vmv.v.i v26, 0 - csrr a4, vlenb - slli a4, a4, 1 - add a4, sp, a4 - addi a4, a4, 352 - vs2r.v v26, (a4) # Unknown-size Folded Spill mv a4, a0 mv a5, a7 + vmv.v.v v26, v24 vmv.v.v v28, v24 vmv.v.v v30, v24 vmv.v.v v6, v24 vmv.v.v v4, v24 - vmv.v.v v2, v24 .LBB2_10: # %vector.body # =>This Inner Loop Header: Depth=1 - vand.vi v0, v24, 1 - vsrl.vi v26, v24, 1 - vand.vi v26, v26, 1 - vadd.vv v26, v0, v26 - vsrl.vi v0, v28, 2 - vand.vi v0, v0, 1 - vadd.vv v26, v26, v0 - vsrl.vi v0, v30, 3 - vand.vi v0, v0, 1 - vadd.vv v26, v26, v0 - vsrl.vi v0, v6, 4 - vand.vi v0, v0, 1 - vadd.vv v26, v26, v0 - vsrl.vi v0, v4, 5 - vand.vi v0, v0, 1 - vadd.vv v26, v26, v0 - vsrl.vi v0, v2, 6 - vand.vi v0, v0, 1 - vadd.vv v26, v26, v0 - csrr a6, vlenb - li t0, 6 - mul a6, a6, t0 - add a6, sp, a6 - addi a6, a6, 352 - vs2r.v v26, (a6) # Unknown-size Folded Spill + vand.vi v2, v24, 1 + vsrl.vi v0, v24, 1 + vand.vi v0, v0, 1 + vadd.vv v2, v2, v0 + vsrl.vi v0, v26, 2 + vand.vi v0, v0, 1 + vadd.vv v2, v2, v0 + vsrl.vi v0, v28, 3 + vand.vi v0, v0, 1 + vadd.vv v2, v2, v0 + vsrl.vi v0, v30, 4 + vand.vi v0, v0, 1 + vadd.vv v2, v2, v0 + vsrl.vi v0, v6, 5 + vand.vi v0, v0, 1 + vadd.vv v2, v2, v0 + vsrl.vi v0, v4, 6 + vand.vi v0, v0, 1 + vadd.vv v2, v2, v0 vsetvli a6, zero, e64, m8, ta, ma vmsgtu.vx v0, v8, a2 vmsgtu.vx v1, v16, a2 vsetvli zero, a3, e8, mf4, ta, ma vslideup.vx v0, v1, s5 - vsetvli a6, zero, e8, m2, ta, ma - csrr a6, vlenb - slli a6, a6, 1 - add a6, sp, a6 - addi a6, a6, 352 - vl2r.v v26, (a6) # Unknown-size Folded Reload - vmerge.vim v26, v26, 1, v0 - csrr a6, vlenb - li t0, 6 - mul a6, a6, t0 - add a6, sp, a6 - addi a6, a6, 352 - vl2r.v v0, (a6) # Unknown-size Folded Reload - vadd.vv v26, v0, v26 - vs2r.v v26, (a5) + vsetvli a6, zero, e8, m2, ta, mu + vadd.vi v2, v2, 1, v0.t + vs2r.v v2, (a5) vsetvli a6, zero, e64, m8, ta, ma vadd.vx v8, v8, a1 vadd.vx v16, v16, a1 vsetvli a6, zero, e8, m2, ta, ma vadd.vx v24, v24, a1 + vadd.vx v26, v26, a1 vadd.vx v28, v28, a1 vadd.vx v30, v30, a1 vadd.vx v6, v6, a1 vadd.vx v4, v4, a1 - vadd.vx v2, v2, a1 sub a4, a4, a1 add a5, a5, a1 bnez a4, .LBB2_10 --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenlib/intern/graph.s 2024-04-01 12:40:58.862463984 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/blenlib/intern/graph.s 2024-04-01 12:41:10.858129449 +0000 @@ -1876,7 +1876,8 @@ srli s7, a0, 2 vsetivli zero, 2, e32, mf2, ta, ma vmv.v.i v8, 0 - addi a0, sp, 176 + add a0, sp, a0 + addi a0, a0, 176 vs1r.v v8, (a0) # Unknown-size Folded Spill fmv.w.x fs1, zero addi s9, sp, 112 @@ -1887,16 +1888,14 @@ fmv.w.x fs3, a0 li s11, 1 vsetvli a0, zero, e32, m1, ta, ma - vmv.v.i v12, 0 + vmv.v.i v8, 0 + addi a0, sp, 176 + vs1r.v v8, (a0) # Unknown-size Folded Spill li a0, -1 srli a0, a0, 32 sd a0, 40(sp) # 8-byte Folded Spill li s10, 3 slli s10, s10, 35 - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 176 - vs1r.v v12, (a0) # Unknown-size Folded Spill sd s5, 64(sp) # 8-byte Folded Spill sd s6, 56(sp) # 8-byte Folded Spill sd s7, 48(sp) # 8-byte Folded Spill @@ -1936,7 +1935,9 @@ add a2, a0, a2 mv a3, a0 fmv.s fa5, fs1 - addi a4, sp, 176 + csrr a4, vlenb + add a4, sp, a4 + addi a4, a4, 176 vl1r.v v8, (a4) # Unknown-size Folded Reload j .LBB26_34 .LBB26_32: # %if.then.i @@ -1975,7 +1976,9 @@ j .LBB26_33 .LBB26_37: # in Loop: Header=BB26_29 Depth=1 fmv.s fa5, fs1 - addi a2, sp, 176 + csrr a2, vlenb + add a2, sp, a2 + addi a2, a2, 176 vl1r.v v8, (a2) # Unknown-size Folded Reload .LBB26_38: # %for.end.i # in Loop: Header=BB26_29 Depth=1 @@ -1997,7 +2000,9 @@ j .LBB26_41 .LBB26_40: # in Loop: Header=BB26_29 Depth=1 fmv.s fa5, fs1 - addi a2, sp, 176 + csrr a2, vlenb + add a2, sp, a2 + addi a2, a2, 176 vl1r.v v8, (a2) # Unknown-size Folded Reload .LBB26_41: # %normalize_v3.exit # in Loop: Header=BB26_29 Depth=1 @@ -2068,18 +2073,18 @@ and a1, a1, a6 mv a2, a1 mv a3, a0 - vmv1r.v v8, v12 + addi a4, sp, 176 + vl1r.v v8, (a4) # Unknown-size Folded Reload ld a5, 24(sp) # 8-byte Folded Reload li a7, 40 .LBB26_56: # %vector.body # Parent Loop BB26_29 Depth=1 # => This Inner Loop Header: Depth=2 vl2re64.v v10, (a3) - vsetvli a4, zero, e32, m1, ta, ma + vsetvli a4, zero, e32, m1, ta, mu vluxei64.v v9, (a7), v10 vmseq.vx v0, v9, s0 - vmerge.vim v9, v12, 1, v0 - vadd.vv v8, v8, v9 + vadd.vi v8, v8, 1, v0.t sub a2, a2, s7 add a3, a3, a5 bnez a2, .LBB26_56 @@ -2200,7 +2205,9 @@ bnez a0, .LBB26_62 # %bb.69: # in Loop: Header=BB26_65 Depth=2 fmv.s fa5, fs1 - addi a0, sp, 176 + csrr a0, vlenb + add a0, sp, a0 + addi a0, a0, 176 vl1r.v v8, (a0) # Unknown-size Folded Reload j .LBB26_63 .LBB26_70: # %for.cond36.preheader.i @@ -2425,10 +2432,6 @@ ld a1, 0(a0) mv a0, s9 jalr a1 - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 176 - vl1r.v v12, (a0) # Unknown-size Folded Reload ld s5, 64(sp) # 8-byte Folded Reload ld s4, 80(sp) # 8-byte Folded Reload ld s6, 56(sp) # 8-byte Folded Reload @@ -2463,10 +2466,6 @@ mv a3, s6 fmv.s fa0, fs0 call markdownSymmetryArc - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 176 - vl1r.v v12, (a0) # Unknown-size Folded Reload lw a6, 32(s2) j .LBB26_101 .LBB26_104: # %if.else.i80 @@ -2489,10 +2488,6 @@ mv a1, s2 fmv.s fa0, fs0 call testAxialSymmetry - csrr a0, vlenb - add a0, sp, a0 - addi a0, a0, 176 - vl1r.v v12, (a0) # Unknown-size Folded Reload j .LBB26_99 .LBB26_109: # %for.end68 csrr a0, vlenb --- build.a/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/object/object_hook.s 2024-04-01 12:40:58.958461307 +0000 +++ build.b/External/SPEC/CFP2017rate/526.blender_r/CMakeFiles/526.blender_r.dir/root/cpu2017/benchspec/CPU/526.blender_r/src/blender/source/blender/editors/object/object_hook.s 2024-04-01 12:41:10.962126549 +0000 @@ -2149,33 +2149,31 @@ mul a6, s5, a6 vsetvli t0, zero, e64, m4, ta, ma vid.v v8 - vmul.vx v8, v8, a7 + vmul.vx v12, v8, a7 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v12, 0 + vmv.v.i v8, 0 li a7, 24 li t0, 26 mv t1, a4 - vmv.v.i v14, 0 .LBB17_63: # %vector.body # =>This Inner Loop Header: Depth=1 vsetvli zero, zero, e64, m4, ta, ma - vadd.vx v16, v8, a3 + vadd.vx v16, v12, a3 vsetvli zero, zero, e16, m1, ta, ma - vluxei64.v v20, (a7), v16 - vand.vi v20, v20, 1 - vmsne.vi v0, v20, 0 - vluxei64.v v20, (t0), v16, v0.t - vmseq.vi v16, v20, 0 - vmand.mm v0, v0, v16 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v16, v12, 1, v0 - vadd.vv v14, v14, v16 + vluxei64.v v10, (a7), v16 + vand.vi v10, v10, 1 + vmsne.vi v0, v10, 0 + vluxei64.v v10, (t0), v16, v0.t + vmseq.vi v10, v10, 0 + vmand.mm v0, v0, v10 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub t1, t1, a5 add a3, a3, a6 bnez t1, .LBB17_63 # %bb.64: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v14, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s s4, v8 bne a4, a2, .LBB17_72 .LBB17_65: # %while.end.i --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ira-color.s 2024-04-01 12:40:59.574444128 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ira-color.s 2024-04-01 12:41:11.610108478 +0000 @@ -9194,10 +9194,12 @@ li s3, 2 li s4, -1 vsetivli zero, 4, e32, m1, ta, ma - vmv.v.i v24, 0 + vmv.v.i v8, 0 + addi a1, sp, 544 + vs1r.v v8, (a1) # Unknown-size Folded Spill vsetvli zero, zero, e16, mf2, ta, ma vid.v v8 - vrsub.vi v25, v8, 3 + vrsub.vi v24, v8, 3 vsetvli zero, zero, e64, m2, ta, ma vmv.v.i v26, 1 csrr a1, vlenb @@ -9207,11 +9209,8 @@ addi a1, a1, 544 vs1r.v v24, (a1) # Unknown-size Folded Spill csrr a1, vlenb - slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 544 - vs1r.v v25, (a1) # Unknown-size Folded Spill - addi a1, sp, 544 vs2r.v v26, (a1) # Unknown-size Folded Spill j .LBB25_117 .LBB25_113: # %if.then5.i.i @@ -9366,13 +9365,10 @@ ld a0, 176(sp) # 8-byte Folded Reload ld a0, %pcrel_lo(.Lpcrel_hi316)(a0) call bitmap_clear - addi a0, sp, 544 - vl2r.v v26, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload + vl2r.v v26, (a0) # Unknown-size Folded Reload csrr a0, vlenb slli a1, a0, 1 add a0, a1, a0 @@ -9441,11 +9437,8 @@ addi a0, a0, 544 vl1r.v v24, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload - addi a0, sp, 544 vl2r.v v26, (a0) # Unknown-size Folded Reload .LBB25_142: # %for.inc87.i.i.i # in Loop: Header=BB25_143 Depth=3 @@ -9529,11 +9522,8 @@ addi a0, a0, 544 vl1r.v v24, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload - addi a0, sp, 544 vl2r.v v26, (a0) # Unknown-size Folded Reload li s4, 1 j .LBB25_142 @@ -9583,11 +9573,8 @@ addi a1, a1, 544 vl1r.v v24, (a1) # Unknown-size Folded Reload csrr a1, vlenb - slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 544 - vl1r.v v25, (a1) # Unknown-size Folded Reload - addi a1, sp, 544 vl2r.v v26, (a1) # Unknown-size Folded Reload bnez a0, .LBB25_161 # %bb.160: # %if.then55.i.i.i @@ -9750,11 +9737,8 @@ addi a0, a0, 544 vl1r.v v24, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload - addi a0, sp, 544 vl2r.v v26, (a0) # Unknown-size Folded Reload ld a4, 128(sp) # 8-byte Folded Reload j .LBB25_116 @@ -9848,8 +9832,9 @@ sub t1, t1, t3 add t1, t2, t1 addi t1, t1, 74 - vmv1r.v v9, v24 - vmv1r.v v10, v24 + addi t2, sp, 544 + vl1r.v v10, (t2) # Unknown-size Folded Reload + vmv1r.v v9, v10 .LBB25_189: # %vector.body # Parent Loop BB25_117 Depth=1 # => This Inner Loop Header: Depth=2 @@ -9857,8 +9842,8 @@ vsetivli zero, 4, e16, mf2, ta, ma vle16.v v8, (t2) vle16.v v11, (t0) - vrgather.vv v12, v8, v25 - vrgather.vv v8, v11, v25 + vrgather.vv v12, v8, v24 + vrgather.vv v8, v11, v24 vsetvli zero, zero, e64, m2, ta, ma vsext.vf4 v14, v12 vsext.vf4 v12, v8 @@ -9879,13 +9864,11 @@ vor.vv v12, v18, v12 vmsne.vi v0, v14, 0 vmsne.vi v8, v12, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v11, v24, 1, v0 - vadd.vv v9, v9, v11 - vmv1r.v v0, v8 - vmerge.vim v8, v24, 1, v0 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v9, v9, 1, v0.t addi t0, t0, -16 - vadd.vv v10, v10, v8 + vmv1r.v v0, v8 + vadd.vi v10, v10, 1, v0.t bne t0, t1, .LBB25_189 # %bb.190: # %middle.block # in Loop: Header=BB25_117 Depth=1 @@ -9920,13 +9903,10 @@ addi a1, a1, %pcrel_lo(.Lpcrel_hi345) mv a4, s1 call fprintf - addi a0, sp, 544 - vl2r.v v26, (a0) # Unknown-size Folded Reload csrr a0, vlenb - slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 544 - vl1r.v v25, (a0) # Unknown-size Folded Reload + vl2r.v v26, (a0) # Unknown-size Folded Reload csrr a0, vlenb slli a1, a0, 1 add a0, a1, a0 --- build.a/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/lac/chunk_sparse_matrix.s 2024-04-01 12:40:58.214482055 +0000 +++ build.b/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/lac/chunk_sparse_matrix.s 2024-04-01 12:41:10.250146404 +0000 @@ -1271,21 +1271,19 @@ fmv.d.x fa5, zero mv a7, a5 mv t0, a1 - vmv.v.i v9, 0 .LBB24_5: # %vector.body # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (t0) vsetvli zero, zero, e64, m2, ta, ma vmfne.vf v0, v10, fa5 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v10, v8, 1, v0 - vadd.vv v9, v9, v10 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a7, a7, a0 add t0, t0, a6 bnez a7, .LBB24_5 # %bb.6: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v9, v8 + vmv.s.x v9, zero + vredsum.vs v8, v8, v9 vmv.x.s a0, v8 beq a4, a5, .LBB24_9 .LBB24_7: # %for.body.i.i.preheader3 @@ -6390,19 +6388,18 @@ fmv.w.x fa5, zero mv a7, a5 mv t0, a1 - vmv.v.i v10, 0 .LBB80_5: # %vector.body # =>This Inner Loop Header: Depth=1 - vl2re32.v v12, (t0) - vmfne.vf v0, v12, fa5 - vmerge.vim v12, v8, 1, v0 - vadd.vv v10, v10, v12 + vl2re32.v v10, (t0) + vsetvli zero, zero, e32, m2, ta, mu + vmfne.vf v0, v10, fa5 + vadd.vi v8, v8, 1, v0.t sub a7, a7, a0 add t0, t0, a6 bnez a7, .LBB80_5 # %bb.6: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vmv.s.x v10, zero + vredsum.vs v8, v8, v10 vmv.x.s a0, v8 beq a4, a5, .LBB80_9 .LBB80_7: # %for.body.i.i.preheader3 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-loop-distribution.s 2024-04-01 12:41:00.518417802 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/tree-loop-distribution.s 2024-04-01 12:41:12.578081483 +0000 @@ -220,6 +220,8 @@ sd a1, 24(sp) # 8-byte Folded Spill vsetvli a1, zero, e32, m1, ta, ma vmv.v.i v8, 0 + addi a1, sp, 336 + vs1r.v v8, (a1) # Unknown-size Folded Spill lui a1, 16 addiw a1, a1, -1 sd a1, 120(sp) # 8-byte Folded Spill @@ -227,12 +229,10 @@ addiw a1, a1, -993 sd a1, 152(sp) # 8-byte Folded Spill vsetivli zero, 2, e64, m1, ta, ma - vmv.v.i v9, 0 + vmv.v.i v8, 0 csrr a1, vlenb add a1, sp, a1 addi a1, a1, 336 - vs1r.v v9, (a1) # Unknown-size Folded Spill - addi a1, sp, 336 vs1r.v v8, (a1) # Unknown-size Folded Spill sd a0, 96(sp) # 8-byte Folded Spill .LBB2_17: # %for.body @@ -708,8 +708,6 @@ # in Loop: Header=BB2_17 Depth=1 ld a2, 8(s0) addi a2, a2, 24 - addi a0, sp, 336 - vl1r.v v12, (a0) # Unknown-size Folded Reload li a6, 32 li a7, 9 bgeu s11, a1, .LBB2_86 @@ -726,10 +724,11 @@ .LBB2_79: # %vector.ph # in Loop: Header=BB2_17 Depth=1 sub a3, a1, a0 - vsetvli a0, zero, e32, m1, ta, ma + vsetvli a0, zero, e32, m1, ta, mu mv a0, a2 mv a4, a3 - vmv1r.v v8, v12 + addi t0, sp, 336 + vl1r.v v8, (t0) # Unknown-size Folded Reload .LBB2_80: # %vector.body # Parent Loop BB2_17 Depth=1 # => This Inner Loop Header: Depth=2 @@ -737,15 +736,13 @@ vsetvli zero, zero, e8, mf4, ta, ma vluxei64.v v9, (a5), v10 vmsne.vi v0, v9, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v12, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e8, mf4, ta, ma vluxei64.v v9, (a7), v10 vmsne.vi v0, v9, 0 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v12, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, s11 add a0, a0, s9 bnez a4, .LBB2_80 --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/reload.s 2024-04-01 12:40:59.622442789 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/reload.s 2024-04-01 12:41:11.662107027 +0000 @@ -9405,8 +9405,6 @@ vsetvli a0, zero, e32, m2, ta, ma vmv.v.i v8, 0 csrr a0, vlenb - slli a2, a0, 1 - add a0, a2, a0 add a0, sp, a0 lui a2, 1 addiw a2, a2, 16 @@ -9415,6 +9413,8 @@ vsetvli zero, zero, e8, mf2, ta, ma vmv.v.i v8, 0 csrr a0, vlenb + slli a2, a0, 1 + add a0, a2, a0 add a0, sp, a0 lui a2, 1 addiw a2, a2, 16 @@ -9654,18 +9654,18 @@ # Parent Loop BB23_104 Depth=2 # Parent Loop BB23_109 Depth=3 # => This Inner Loop Header: Depth=4 - ld a4, 8(s7) - lwu a2, 0(a4) - and a1, a2, s4 + ld a2, 8(s7) + lwu a4, 0(a2) + and a1, a4, s4 bne a1, s5, .LBB23_121 # %bb.117: # %land.lhs.true655 # in Loop: Header=BB23_116 Depth=4 - lw a0, 8(a4) + lw a0, 8(a2) li a1, 37 bltu s10, a0, .LBB23_121 # %bb.118: # %if.then661 # in Loop: Header=BB23_116 Depth=4 - slli a1, a2, 40 + slli a1, a4, 40 lw a2, 16(s7) srli a1, a1, 56 slli a3, a3, 40 @@ -9691,8 +9691,8 @@ and a1, a3, s4 j .LBB23_122 .LBB23_121: # in Loop: Header=BB23_116 Depth=4 - mv a3, a2 - mv s7, a4 + mv a3, a4 + mv s7, a2 .LBB23_122: # %if.end695 # in Loop: Header=BB23_116 Depth=4 slli a0, a1, 2 @@ -9935,14 +9935,6 @@ add a0, a1, a0 lbu a0, 0(a0) mv a6, s2 - csrr a1, vlenb - slli a2, a1, 1 - add a1, a2, a1 - add a1, sp, a1 - lui a2, 1 - addiw a2, a2, 16 - add a1, a1, a2 - vl2r.v v8, (a1) # Unknown-size Folded Reload beqz a0, .LBB23_172 .LBB23_154: # %if.then853 # in Loop: Header=BB23_148 Depth=4 @@ -10063,14 +10055,6 @@ sub a0, a0, s10 .LBB23_171: # %cond.true813 # in Loop: Header=BB23_148 Depth=4 - csrr a1, vlenb - slli a2, a1, 1 - add a1, a2, a1 - add a1, sp, a1 - lui a2, 1 - addiw a2, a2, 16 - add a1, a1, a2 - vl2r.v v8, (a1) # Unknown-size Folded Reload sext.w a0, a0 li a1, 30 mul a0, a0, a1 @@ -10153,14 +10137,6 @@ snez a0, a0 subw a0, s4, a0 sd a0, 608(sp) # 8-byte Folded Spill - csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 - add a0, sp, a0 - lui a1, 1 - addiw a1, a1, 16 - add a0, a0, a1 - vl2r.v v8, (a0) # Unknown-size Folded Reload ld s4, 592(sp) # 8-byte Folded Reload ld a2, 632(sp) # 8-byte Folded Reload sw s11, 0(s5) @@ -10191,6 +10167,12 @@ addi a1, a1, 705 vsetvli a2, zero, e32, m2, ta, ma mv a2, a0 + csrr a3, vlenb + add a3, sp, a3 + lui a4, 1 + addiw a4, a4, 16 + add a3, a3, a4 + vl2r.v v8, (a3) # Unknown-size Folded Reload ld a3, 376(sp) # 8-byte Folded Reload .LBB23_185: # %vector.body2208 # Parent Loop BB23_101 Depth=1 @@ -11321,13 +11303,7 @@ lui a2, 1 addiw a2, a2, 16 add a1, a1, a2 - vl2r.v v16, (a1) # Unknown-size Folded Reload - csrr a1, vlenb - add a1, sp, a1 - lui a2, 1 - addiw a2, a2, 16 - add a1, a1, a2 - vl1r.v v8, (a1) # Unknown-size Folded Reload + vl1r.v v16, (a1) # Unknown-size Folded Reload beqz a0, .LBB23_349 .LBB23_361: # %if.then1768 # in Loop: Header=BB23_351 Depth=4 @@ -11413,13 +11389,7 @@ lui a1, 1 addiw a1, a1, 16 add a0, a0, a1 - vl2r.v v16, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - add a0, sp, a0 - lui a1, 1 - addiw a1, a1, 16 - add a0, a0, a1 - vl1r.v v8, (a0) # Unknown-size Folded Reload + vl1r.v v16, (a0) # Unknown-size Folded Reload ld a0, 632(sp) # 8-byte Folded Reload bge s10, a0, .LBB23_382 # %bb.372: # %if.end72.i @@ -11453,13 +11423,7 @@ lui a3, 1 addiw a3, a3, 16 add a2, a2, a3 - vl2r.v v16, (a2) # Unknown-size Folded Reload - csrr a2, vlenb - add a2, sp, a2 - lui a3, 1 - addiw a3, a3, 16 - add a2, a2, a3 - vl1r.v v8, (a2) # Unknown-size Folded Reload + vl1r.v v16, (a2) # Unknown-size Folded Reload ld s10, 448(sp) # 8-byte Folded Reload beq a0, a1, .LBB23_349 # %bb.375: # %land.lhs.true38.i @@ -11488,13 +11452,7 @@ lui a3, 1 addiw a3, a3, 16 add a2, a2, a3 - vl2r.v v16, (a2) # Unknown-size Folded Reload - csrr a2, vlenb - add a2, sp, a2 - lui a3, 1 - addiw a3, a3, 16 - add a2, a2, a3 - vl1r.v v8, (a2) # Unknown-size Folded Reload + vl1r.v v16, (a2) # Unknown-size Folded Reload ld s10, 448(sp) # 8-byte Folded Reload bne a0, a1, .LBB23_361 # %bb.379: # %land.lhs.true58.i @@ -11551,8 +11509,12 @@ add a1, sp, a1 vl1r.v v10, (a1) # Unknown-size Folded Reload vmv.s.x v10, a3 - vmv1r.v v18, v8 - vmv2r.v v8, v16 + csrr a1, vlenb + add a1, sp, a1 + lui a2, 1 + addiw a2, a2, 16 + add a1, a1, a2 + vl2r.v v8, (a1) # Unknown-size Folded Reload and a0, a0, s10 vmv1r.v v8, v10 addi a1, sp, 2047 @@ -11576,11 +11538,10 @@ vsetvli zero, zero, e8, mf2, ta, ma vmsne.vi v10, v10, 0 vmand.mm v0, v0, v10 - vse8.v v18, (a3), v0.t - vse8.v v18, (a2), v0.t - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v10, v16, 1, v0 - vadd.vv v8, v8, v10 + vse8.v v16, (a3), v0.t + vse8.v v16, (a2), v0.t + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t add a1, a1, a7 add a2, a2, a6 sub a4, a4, a6 @@ -14500,18 +14461,18 @@ ret .LBB23_803: # %for.body45.lr.ph.i li s5, 0 - li s2, 0 + li s9, 0 addi s7, s3, 48 - li s9, 9 + li s10, 9 .Lpcrel_hi314: auipc a0, %got_pcrel_hi(mode_class) - ld s10, %pcrel_lo(.Lpcrel_hi314)(a0) + ld s11, %pcrel_lo(.Lpcrel_hi314)(a0) .Lpcrel_hi315: auipc a0, %pcrel_hi(secondary_memlocs_elim) addi s6, a0, %pcrel_lo(.Lpcrel_hi315) .Lpcrel_hi316: auipc a0, %got_pcrel_hi(mode_size) - ld s11, %pcrel_lo(.Lpcrel_hi316)(a0) + ld s2, %pcrel_lo(.Lpcrel_hi316)(a0) li s8, 608 j .LBB23_806 .LBB23_804: # %land.lhs.true335.i @@ -14526,10 +14487,10 @@ .LBB23_805: # %for.inc477.i # in Loop: Header=BB23_806 Depth=1 lw a0, %pcrel_lo(.Lpcrel_hi225)(a6) - addi s2, s2, 1 + addi s9, s9, 1 addi s5, s5, 1 addi s7, s7, 104 - bge s2, a0, .LBB23_839 + bge s9, a0, .LBB23_839 .LBB23_806: # %for.body45.i # =>This Inner Loop Header: Depth=1 ld a0, -48(s7) @@ -14542,7 +14503,7 @@ # %bb.808: # %land.lhs.true65.i # in Loop: Header=BB23_806 Depth=1 lw a0, 44(s7) - bltu s9, a0, .LBB23_810 + bltu s10, a0, .LBB23_810 # %bb.809: # %land.lhs.true65.i # in Loop: Header=BB23_806 Depth=1 srl a0, s8, a0 @@ -14562,7 +14523,7 @@ beq a1, a2, .LBB23_813 # %bb.812: # %cond.false105.i # in Loop: Header=BB23_806 Depth=1 - add a1, s11, a1 + add a1, s2, a1 lbu a0, 0(a1) .LBB23_813: # %cond.end.i1247 # in Loop: Header=BB23_806 Depth=1 @@ -14571,7 +14532,7 @@ j .LBB23_815 .LBB23_814: # %cond.true.i1249 # in Loop: Header=BB23_806 Depth=1 - add a1, s10, a1 + add a1, s11, a1 lbu a0, 0(a1) andi a0, a0, 254 addi a0, a0, -10 @@ -14592,7 +14553,7 @@ beq a1, a2, .LBB23_818 # %bb.817: # %cond.false147.i # in Loop: Header=BB23_806 Depth=1 - add a0, s11, a1 + add a0, s2, a1 lbu a0, 0(a0) .LBB23_818: # %cond.end155.i # in Loop: Header=BB23_806 Depth=1 @@ -14601,7 +14562,7 @@ j .LBB23_820 .LBB23_819: # %cond.true121.i # in Loop: Header=BB23_806 Depth=1 - add a0, s10, a1 + add a0, s11, a1 lbu a0, 0(a0) andi a0, a0, 254 addi a0, a0, -10 --- build.a/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/grid/tria.s 2024-04-01 12:40:58.174483170 +0000 +++ build.b/External/SPEC/CFP2017rate/510.parest_r/CMakeFiles/510.parest_r.dir/root/cpu2017/benchspec/CPU/510.parest_r/src/source/grid/tria.s 2024-04-01 12:41:10.214147408 +0000 @@ -19235,10 +19235,10 @@ vluxei64.v v10, (zero), v10 vand.vx v12, v12, t1 vsrl.vv v10, v10, v12 - vsetvli zero, zero, e32, m1, ta, ma - vnsrl.wi v12, v10, 0 - vand.vi v10, v12, 1 - vadd.vv v8, v8, v10 + vand.vi v10, v10, 1 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t subw a7, a7, t0 vadd.vx v9, v9, t0 bnez a7, .LBB37_401 @@ -24053,62 +24053,61 @@ neg t0, t1 and t0, a0, t0 vsetvli t2, zero, e32, m1, ta, ma - vid.v v9 - vmv.v.i v10, 0 + vid.v v10 + vmv.v.i v9, 0 li t2, 63 mv t3, t0 - vmv.v.i v11, 0 .LBB46_6: # %vector.body # =>This Inner Loop Header: Depth=1 - vsrl.vi v8, v9, 1 + vsetvli zero, zero, e32, m1, ta, ma + vsrl.vi v8, v10, 1 vadd.vx v8, v8, a4 vsetvli zero, zero, e64, m2, ta, ma vzext.vf2 v12, v8 vsll.vi v12, v12, 2 vsetvli zero, zero, e32, m1, ta, ma vluxei64.v v8, (a5), v12 - vand.vi v12, v9, 1 - vadd.vv v12, v8, v12 - vsll.vi v8, v12, 2 + vand.vi v11, v10, 1 + vadd.vv v11, v8, v11 + vsll.vi v8, v11, 2 vsetvli zero, zero, e64, m2, ta, ma - vzext.vf2 v14, v8 - vsll.vi v14, v14, 2 + vzext.vf2 v12, v8 + vsll.vi v12, v12, 2 vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v8, (a6), v14 + vluxei64.v v8, (a6), v12 vmseq.vi v8, v8, -1 vsetvli zero, zero, e64, m2, ta, ma - vsext.vf2 v14, v12 + vsext.vf2 v12, v11 vmv1r.v v0, v8 - vlse64.v v16, (a7), zero, v0.t + vlse64.v v14, (a7), zero, v0.t vsetvli zero, zero, e32, m1, ta, ma - vsra.vi v13, v12, 31 - vsrl.vi v13, v13, 26 - vadd.vv v12, v12, v13 - vsra.vi v12, v12, 6 + vsra.vi v16, v11, 31 + vsrl.vi v16, v16, 26 + vadd.vv v11, v11, v16 + vsra.vi v11, v11, 6 vsetvli zero, zero, e64, m2, ta, mu - vand.vx v18, v14, a2 - vmsgtu.vx v0, v18, a3 - vsext.vf2 v18, v12 - vsll.vi v12, v18, 3 - vadd.vv v12, v16, v12 - vadd.vi v12, v12, -8, v0.t - vand.vx v14, v14, t2 + vand.vx v16, v12, a2 + vmsgtu.vx v0, v16, a3 + vsext.vf2 v16, v11 + vsll.vi v16, v16, 3 + vadd.vv v14, v14, v16 + vadd.vi v14, v14, -8, v0.t + vand.vx v12, v12, t2 vsetvli zero, zero, e64, m2, ta, ma vmv1r.v v0, v8 - vluxei64.v v12, (zero), v12, v0.t - vsrl.vv v12, v12, v14 + vluxei64.v v14, (zero), v14, v0.t + vsrl.vv v12, v14, v12 vand.vi v12, v12, 1 - vmsne.vi v14, v12, 0 - vmand.mm v0, v8, v14 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v8, v10, 1, v0 - vadd.vv v11, v11, v8 + vmsne.vi v11, v12, 0 + vmand.mm v0, v8, v11 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v9, v9, 1, v0.t subw t3, t3, t1 - vadd.vx v9, v9, t1 + vadd.vx v10, v10, t1 bnez t3, .LBB46_6 # %bb.7: # %middle.block vmv.s.x v8, zero - vredsum.vs v8, v11, v8 + vredsum.vs v8, v9, v8 vmv.x.s t1, v8 bne a0, t0, .LBB46_12 .LBB46_8: # %for.cond.cleanup --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/cfgloop.s 2024-04-01 12:40:59.398449036 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/cfgloop.s 2024-04-01 12:41:11.426113609 +0000 @@ -4315,7 +4315,6 @@ li a5, 8 mv a6, a2 mv a7, a0 - vmv.v.i v9, 0 .LBB32_7: # %vector.body # =>This Inner Loop Header: Depth=1 vl2re64.v v10, (a7) @@ -4323,17 +4322,17 @@ vluxei64.v v10, (a5), v10 vmsne.vi v0, v10, 0 vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v12, (zero), v10, v0.t - vmsgtu.vi v10, v12, 1 - vmand.mm v0, v0, v10 - vmerge.vim v10, v8, 1, v0 - vadd.vv v9, v9, v10 + vluxei64.v v9, (zero), v10, v0.t + vmsgtu.vi v9, v9, 1 + vmand.mm v0, v0, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t sub a6, a6, a3 add a7, a7, a4 bnez a6, .LBB32_7 # %bb.8: # %middle.block - vmv.s.x v8, zero - vredsum.vs v8, v9, v8 + vmv.s.x v9, zero + vredsum.vs v8, v8, v9 vmv.x.s s0, v8 bne a2, a1, .LBB32_10 .LBB32_9: # %for.end --- build.a/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/haifa-sched.s 2024-04-01 12:40:59.486446582 +0000 +++ build.b/External/SPEC/CINT2017rate/502.gcc_r/CMakeFiles/502.gcc_r.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/haifa-sched.s 2024-04-01 12:41:11.518111043 +0000 @@ -2994,18 +2994,16 @@ sub a2, a2, a4 and a2, a2, s8 vsetvli a4, zero, e32, m2, ta, ma - vmv.v.i v10, 0 + vmv.v.i v8, 0 mv a4, a2 mv a5, a1 - vmv.v.i v8, 0 .LBB23_26: # %vector.body108 # =>This Inner Loop Header: Depth=1 - vle8.v v12, (a5) vsetvli zero, zero, e8, mf2, ta, ma - vmseq.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 - vadd.vv v8, v8, v12 + vle8.v v10, (a5) + vmseq.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t sub a4, a4, a3 add a5, a5, a3 bnez a4, .LBB23_26 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ira-emit.s 2024-04-01 12:41:00.434420144 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ira-emit.s 2024-04-01 12:41:12.490083938 +0000 @@ -2734,7 +2734,7 @@ auipc a1, %pcrel_hi(hard_regno_last_set_check) addi a1, a1, %pcrel_lo(.Lpcrel_hi80) sd a1, 64(sp) # 8-byte Folded Spill - beqz s0, .LBB5_34 + beqz s0, .LBB5_33 # %bb.1: # %for.body.preheader vsetvli a1, zero, e32, m1, ta, ma vmv.v.x v8, a0 @@ -2822,54 +2822,51 @@ j .LBB5_2 .LBB5_12: # %for.body24.preheader vsetvli zero, zero, e32, m1, ta, ma - vmv.v.i v12, 0 + vmv.v.i v8, 0 + addi a0, sp, 128 + vs1r.v v8, (a0) # Unknown-size Folded Spill li s9, 8 li s10, 4 mv s11, s0 - addi a0, sp, 128 - vs1r.v v12, (a0) # Unknown-size Folded Spill - j .LBB5_16 -.LBB5_13: # in Loop: Header=BB5_16 Depth=1 - addi a1, sp, 128 - vl1r.v v12, (a1) # Unknown-size Folded Reload -.LBB5_14: # %for.end89 - # in Loop: Header=BB5_16 Depth=1 + j .LBB5_15 +.LBB5_13: # %for.end89 + # in Loop: Header=BB5_15 Depth=1 sw a0, 28(s11) -.LBB5_15: # %for.inc91 - # in Loop: Header=BB5_16 Depth=1 +.LBB5_14: # %for.inc91 + # in Loop: Header=BB5_15 Depth=1 ld s11, 16(s11) - beqz s11, .LBB5_34 -.LBB5_16: # %for.body24 + beqz s11, .LBB5_33 +.LBB5_15: # %for.body24 # =>This Loop Header: Depth=1 - # Child Loop BB5_22 Depth 2 - # Child Loop BB5_32 Depth 2 - # Child Loop BB5_27 Depth 2 + # Child Loop BB5_21 Depth 2 + # Child Loop BB5_31 Depth 2 + # Child Loop BB5_26 Depth 2 ld s8, 0(s11) lw s6, 12(s8) - bltz s6, .LBB5_15 -# %bb.17: # %if.then30 - # in Loop: Header=BB5_16 Depth=1 + bltz s6, .LBB5_14 +# %bb.16: # %if.then30 + # in Loop: Header=BB5_15 Depth=1 lwu a0, 8(s8) li a1, 87 mul a1, s6, a1 add a0, s3, a0 add a0, a0, a1 lbu s7, 0(a0) - beqz s7, .LBB5_20 -# %bb.18: # %for.body40.lr.ph - # in Loop: Header=BB5_16 Depth=1 + beqz s7, .LBB5_19 +# %bb.17: # %for.body40.lr.ph + # in Loop: Header=BB5_15 Depth=1 lw a0, %pcrel_lo(.Lpcrel_hi78)(s4) addi a1, s8, 4 - bgeu s7, s2, .LBB5_21 -# %bb.19: # in Loop: Header=BB5_16 Depth=1 + bgeu s7, s2, .LBB5_20 +# %bb.18: # in Loop: Header=BB5_15 Depth=1 li a2, 0 li a3, 0 - j .LBB5_30 -.LBB5_20: # in Loop: Header=BB5_16 Depth=1 + j .LBB5_29 +.LBB5_19: # in Loop: Header=BB5_15 Depth=1 li a3, 0 - j .LBB5_24 -.LBB5_21: # %vector.ph190 - # in Loop: Header=BB5_16 Depth=1 + j .LBB5_23 +.LBB5_20: # %vector.ph190 + # in Loop: Header=BB5_15 Depth=1 ld a2, 112(sp) # 8-byte Folded Reload li a3, 254 mul a2, a2, a3 @@ -2880,43 +2877,44 @@ slli a4, s6, 2 ld a5, 64(sp) # 8-byte Folded Reload add a4, a5, a4 - vsetvli a5, zero, e32, m1, ta, ma + vsetvli a5, zero, e32, m1, ta, mu mv a5, a2 - vmv1r.v v8, v12 -.LBB5_22: # %vector.body195 - # Parent Loop BB5_16 Depth=1 + addi a6, sp, 128 + vl1r.v v8, (a6) # Unknown-size Folded Reload +.LBB5_21: # %vector.body195 + # Parent Loop BB5_15 Depth=1 # => This Inner Loop Header: Depth=2 vl1re32.v v9, (a4) vmseq.vx v0, v9, a0 - vle64.v v10, (a3), v0.t vsetvli zero, zero, e64, m2, ta, ma + vle64.v v10, (a3), v0.t vluxei64.v v10, (s9), v10, v0.t vsetvli zero, zero, e32, m1, ta, ma vluxei64.v v9, (s10), v10, v0.t vlse32.v v10, (a1), zero, v0.t vmsne.vv v9, v9, v10 vmand.mm v0, v0, v9 - vmerge.vim v9, v12, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t add a3, a3, s1 sub a5, a5, s2 add a4, a4, s5 - bnez a5, .LBB5_22 -# %bb.23: # %middle.block187 - # in Loop: Header=BB5_16 Depth=1 + bnez a5, .LBB5_21 +# %bb.22: # %middle.block187 + # in Loop: Header=BB5_15 Depth=1 vmv.s.x v9, zero vredsum.vs v8, v8, v9 vmv.x.s a3, v8 - bne a2, s7, .LBB5_30 -.LBB5_24: # %for.end58 - # in Loop: Header=BB5_16 Depth=1 + bne a2, s7, .LBB5_29 +.LBB5_23: # %for.end58 + # in Loop: Header=BB5_15 Depth=1 slli a0, a3, 3 call ira_allocate sd a0, 32(s11) li a0, 0 beqz s7, .LBB5_13 -# %bb.25: # %for.body63.lr.ph - # in Loop: Header=BB5_16 Depth=1 +# %bb.24: # %for.body63.lr.ph + # in Loop: Header=BB5_15 Depth=1 lw a1, %pcrel_lo(.Lpcrel_hi78)(s4) slli a2, s6, 3 ld a3, 72(sp) # 8-byte Folded Reload @@ -2927,36 +2925,34 @@ ld a5, 64(sp) # 8-byte Folded Reload add a3, a5, a3 add a4, a5, a4 - addi a5, sp, 128 - vl1r.v v12, (a5) # Unknown-size Folded Reload - j .LBB5_27 -.LBB5_26: # %for.inc87 - # in Loop: Header=BB5_27 Depth=2 + j .LBB5_26 +.LBB5_25: # %for.inc87 + # in Loop: Header=BB5_26 Depth=2 addi a3, a3, 4 addi a2, a2, 8 - beq a3, a4, .LBB5_14 -.LBB5_27: # %for.body63 - # Parent Loop BB5_16 Depth=1 + beq a3, a4, .LBB5_13 +.LBB5_26: # %for.body63 + # Parent Loop BB5_15 Depth=1 # => This Inner Loop Header: Depth=2 lw a5, 0(a3) - bne a5, a1, .LBB5_26 -# %bb.28: # %land.lhs.true69 - # in Loop: Header=BB5_27 Depth=2 + bne a5, a1, .LBB5_25 +# %bb.27: # %land.lhs.true69 + # in Loop: Header=BB5_26 Depth=2 ld a5, 0(a2) ld a6, 8(a5) lw a6, 4(a6) lw a7, 4(s8) - beq a6, a7, .LBB5_26 -# %bb.29: # %if.then78 - # in Loop: Header=BB5_27 Depth=2 + beq a6, a7, .LBB5_25 +# %bb.28: # %if.then78 + # in Loop: Header=BB5_26 Depth=2 ld a6, 32(s11) slli a7, a0, 3 addiw a0, a0, 1 add a6, a6, a7 sd a5, 0(a6) - j .LBB5_26 -.LBB5_30: # %for.body40.preheader - # in Loop: Header=BB5_16 Depth=1 + j .LBB5_25 +.LBB5_29: # %for.body40.preheader + # in Loop: Header=BB5_15 Depth=1 add a4, a2, s6 slli a2, a4, 3 ld a5, 72(sp) # 8-byte Folded Reload @@ -2967,19 +2963,19 @@ add a5, s7, s6 slli a5, a5, 2 add a5, a6, a5 - j .LBB5_32 -.LBB5_31: # %for.inc56 - # in Loop: Header=BB5_32 Depth=2 + j .LBB5_31 +.LBB5_30: # %for.inc56 + # in Loop: Header=BB5_31 Depth=2 addi a4, a4, 4 addi a2, a2, 8 - beq a4, a5, .LBB5_24 -.LBB5_32: # %for.body40 - # Parent Loop BB5_16 Depth=1 + beq a4, a5, .LBB5_23 +.LBB5_31: # %for.body40 + # Parent Loop BB5_15 Depth=1 # => This Inner Loop Header: Depth=2 lw a6, 0(a4) - bne a6, a0, .LBB5_31 -# %bb.33: # %land.lhs.true - # in Loop: Header=BB5_32 Depth=2 + bne a6, a0, .LBB5_30 +# %bb.32: # %land.lhs.true + # in Loop: Header=BB5_31 Depth=2 ld a6, 0(a2) ld a6, 8(a6) lw a6, 4(a6) @@ -2987,52 +2983,52 @@ xor a6, a6, a7 snez a6, a6 addw a3, a3, a6 - j .LBB5_31 -.LBB5_34: # %for.end93 + j .LBB5_30 +.LBB5_33: # %for.end93 .Lpcrel_hi82: auipc a0, %pcrel_hi(move_vec) sd a0, 112(sp) # 8-byte Folded Spill ld a0, %pcrel_lo(.Lpcrel_hi82)(a0) - beqz a0, .LBB5_36 -# %bb.35: # %if.then.i + beqz a0, .LBB5_35 +# %bb.34: # %if.then.i sw zero, 0(a0) -.LBB5_36: # %VEC_move_t_base_truncate.exit - beqz s0, .LBB5_38 -.LBB5_37: # %for.body97 +.LBB5_35: # %VEC_move_t_base_truncate.exit + beqz s0, .LBB5_37 +.LBB5_36: # %for.body97 # =>This Inner Loop Header: Depth=1 mv a0, s0 call traverse_moves ld s0, 16(s0) - bnez s0, .LBB5_37 -.LBB5_38: # %for.end100 + bnez s0, .LBB5_36 +.LBB5_37: # %for.end100 ld a0, 112(sp) # 8-byte Folded Reload ld a0, %pcrel_lo(.Lpcrel_hi82)(a0) - beqz a0, .LBB5_44 -# %bb.39: # %VEC_move_t_base_length.exit + beqz a0, .LBB5_43 +# %bb.38: # %VEC_move_t_base_length.exit lw a1, 0(a0) - blez a1, .LBB5_44 -# %bb.40: # %for.body111.lr.ph + blez a1, .LBB5_43 +# %bb.39: # %for.body111.lr.ph li a2, 0 slli a1, a1, 3 add a1, a0, a1 - j .LBB5_42 -.LBB5_41: # %if.end124 - # in Loop: Header=BB5_42 Depth=1 + j .LBB5_41 +.LBB5_40: # %if.end124 + # in Loop: Header=BB5_41 Depth=1 addi a1, a1, -8 mv a2, a3 - beq a1, a0, .LBB5_45 -.LBB5_42: # %for.body111 + beq a1, a0, .LBB5_44 +.LBB5_41: # %for.body111 # =>This Inner Loop Header: Depth=1 ld a3, 0(a1) sd zero, 16(a3) - beqz a2, .LBB5_41 -# %bb.43: # %if.then122 - # in Loop: Header=BB5_42 Depth=1 + beqz a2, .LBB5_40 +# %bb.42: # %if.then122 + # in Loop: Header=BB5_41 Depth=1 sd a3, 16(a2) - j .LBB5_41 -.LBB5_44: + j .LBB5_40 +.LBB5_43: li a3, 0 -.LBB5_45: # %VEC_move_t_base_truncate.exit116 +.LBB5_44: # %VEC_move_t_base_truncate.exit116 sd a3, 16(sp) # 8-byte Folded Spill lw a1, 0(a0) addi a1, a1, -1 @@ -3045,8 +3041,8 @@ sw a2, %pcrel_lo(.Lpcrel_hi78)(s4) sw zero, 0(a0) sd a1, 24(sp) # 8-byte Folded Spill - beqz a1, .LBB5_72 -# %bb.46: # %for.body144.preheader + beqz a1, .LBB5_71 +# %bb.45: # %for.body144.preheader slli a7, s5, 1 srli s11, s5, 2 .Lpcrel_hi83: @@ -3076,23 +3072,23 @@ sd a0, 48(sp) # 8-byte Folded Spill ld s0, 24(sp) # 8-byte Folded Reload sd a7, 32(sp) # 8-byte Folded Spill - j .LBB5_48 -.LBB5_47: # %for.inc236 - # in Loop: Header=BB5_48 Depth=1 + j .LBB5_47 +.LBB5_46: # %for.inc236 + # in Loop: Header=BB5_47 Depth=1 ld s0, 16(s0) - beqz s0, .LBB5_72 -.LBB5_48: # %for.body144 + beqz s0, .LBB5_71 +.LBB5_47: # %for.body144 # =>This Loop Header: Depth=1 - # Child Loop BB5_52 Depth 2 - # Child Loop BB5_68 Depth 2 - # Child Loop BB5_71 Depth 2 + # Child Loop BB5_51 Depth 2 + # Child Loop BB5_67 Depth 2 + # Child Loop BB5_70 Depth 2 ld a1, 0(s0) lw a0, 12(a1) ld a2, 8(s0) sd a2, 56(sp) # 8-byte Folded Spill - bltz a0, .LBB5_63 -# %bb.49: # %if.then150 - # in Loop: Header=BB5_48 Depth=1 + bltz a0, .LBB5_62 +# %bb.48: # %if.then150 + # in Loop: Header=BB5_47 Depth=1 lwu a1, 8(a1) li a2, 87 mul a2, a0, a2 @@ -3100,9 +3096,9 @@ add a1, a3, a1 add a1, a1, a2 lbu a1, 0(a1) - beqz a1, .LBB5_63 -# %bb.50: # %for.body160.preheader - # in Loop: Header=BB5_48 Depth=1 + beqz a1, .LBB5_62 +# %bb.49: # %for.body160.preheader + # in Loop: Header=BB5_47 Depth=1 slli s9, a0, 3 ld a2, 72(sp) # 8-byte Folded Reload add s9, a2, s9 @@ -3112,26 +3108,26 @@ slli a1, a1, 2 add a0, a2, a0 add s8, a0, a1 - j .LBB5_52 -.LBB5_51: # %for.inc208 - # in Loop: Header=BB5_52 Depth=2 + j .LBB5_51 +.LBB5_50: # %for.inc208 + # in Loop: Header=BB5_51 Depth=2 addi s7, s7, 4 addi s9, s9, 8 - beq s7, s8, .LBB5_63 -.LBB5_52: # %for.body160 - # Parent Loop BB5_48 Depth=1 + beq s7, s8, .LBB5_62 +.LBB5_51: # %for.body160 + # Parent Loop BB5_47 Depth=1 # => This Inner Loop Header: Depth=2 lw a0, 0(s7) lw a1, %pcrel_lo(.Lpcrel_hi78)(s4) - bne a0, a1, .LBB5_51 -# %bb.53: # %land.lhs.true166 - # in Loop: Header=BB5_52 Depth=2 + bne a0, a1, .LBB5_50 +# %bb.52: # %land.lhs.true166 + # in Loop: Header=BB5_51 Depth=2 ld s1, 0(s9) ld a1, 8(s1) lw a0, 12(a1) - bltz a0, .LBB5_51 -# %bb.54: # %if.then174 - # in Loop: Header=BB5_52 Depth=2 + bltz a0, .LBB5_50 +# %bb.53: # %if.then174 + # in Loop: Header=BB5_51 Depth=2 lw a0, 4(a1) ld a2, 32(a1) li a1, 0 @@ -3175,20 +3171,20 @@ lw a1, 0(s6) sd a0, 24(s3) li a0, 4 - blt a1, a0, .LBB5_57 -# %bb.55: # %if.then174 - # in Loop: Header=BB5_52 Depth=2 + blt a1, a0, .LBB5_56 +# %bb.54: # %if.then174 + # in Loop: Header=BB5_51 Depth=2 ld a0, 104(sp) # 8-byte Folded Reload ld a0, 0(a0) - beqz a0, .LBB5_57 -# %bb.56: # %if.then.i118 - # in Loop: Header=BB5_52 Depth=2 + beqz a0, .LBB5_56 +# %bb.55: # %if.then.i118 + # in Loop: Header=BB5_51 Depth=2 lw a2, 8(s3) lw a3, 8(s10) ld a1, 48(sp) # 8-byte Folded Reload call fprintf -.LBB5_57: # %create_new_reg.exit - # in Loop: Header=BB5_52 Depth=2 +.LBB5_56: # %create_new_reg.exit + # in Loop: Header=BB5_51 Depth=2 lw a0, 0(s2) ld a1, 80(sp) # 8-byte Folded Reload lw a1, 0(a1) @@ -3211,21 +3207,21 @@ sd zero, 40(s3) sb zero, 24(s3) sd s2, 8(s1) - beqz a0, .LBB5_59 -# %bb.58: # %VEC_move_t_base_space.exit.i.i - # in Loop: Header=BB5_52 Depth=2 + beqz a0, .LBB5_58 +# %bb.57: # %VEC_move_t_base_space.exit.i.i + # in Loop: Header=BB5_51 Depth=2 lw a2, 4(a0) lw a1, 0(a0) - bne a2, a1, .LBB5_60 -.LBB5_59: # %if.then.i.i - # in Loop: Header=BB5_52 Depth=2 + bne a2, a1, .LBB5_59 +.LBB5_58: # %if.then.i.i + # in Loop: Header=BB5_51 Depth=2 li a1, 1 call vec_heap_p_reserve ld a1, 112(sp) # 8-byte Folded Reload sd a0, %pcrel_lo(.Lpcrel_hi82)(a1) lw a1, 0(a0) -.LBB5_60: # %VEC_move_t_heap_safe_push.exit - # in Loop: Header=BB5_52 Depth=2 +.LBB5_59: # %VEC_move_t_heap_safe_push.exit + # in Loop: Header=BB5_51 Depth=2 slli a2, a1, 32 addi a1, a1, 1 sw a1, 0(a0) @@ -3240,14 +3236,14 @@ addi a0, a0, 1 sw a0, 0(a1) li a0, 3 - blt a2, a0, .LBB5_51 -# %bb.61: # %VEC_move_t_heap_safe_push.exit - # in Loop: Header=BB5_52 Depth=2 + blt a2, a0, .LBB5_50 +# %bb.60: # %VEC_move_t_heap_safe_push.exit + # in Loop: Header=BB5_51 Depth=2 ld a0, 104(sp) # 8-byte Folded Reload ld a0, 0(a0) - beqz a0, .LBB5_51 -# %bb.62: # %if.then201 - # in Loop: Header=BB5_52 Depth=2 + beqz a0, .LBB5_50 +# %bb.61: # %if.then201 + # in Loop: Header=BB5_51 Depth=2 ld a1, 16(s2) lw a2, 0(s2) lw a3, 8(a1) @@ -3255,15 +3251,15 @@ auipc a1, %pcrel_hi(.L.str.9) addi a1, a1, %pcrel_lo(.Lpcrel_hi89) call fprintf - j .LBB5_51 -.LBB5_63: # %if.end211 - # in Loop: Header=BB5_48 Depth=1 + j .LBB5_50 +.LBB5_62: # %if.end211 + # in Loop: Header=BB5_47 Depth=1 ld a0, 56(sp) # 8-byte Folded Reload lw a1, 12(a0) ld a7, 32(sp) # 8-byte Folded Reload - bltz a1, .LBB5_47 -# %bb.64: # %if.end216 - # in Loop: Header=BB5_48 Depth=1 + bltz a1, .LBB5_46 +# %bb.63: # %if.end216 + # in Loop: Header=BB5_47 Depth=1 lwu a0, 8(a0) li a2, 87 mul a2, a1, a2 @@ -3271,16 +3267,16 @@ add a0, a3, a0 add a0, a0, a2 lbu a2, 0(a0) - beqz a2, .LBB5_47 -# %bb.65: # %for.body226.lr.ph - # in Loop: Header=BB5_48 Depth=1 + beqz a2, .LBB5_46 +# %bb.64: # %for.body226.lr.ph + # in Loop: Header=BB5_47 Depth=1 lw a0, %pcrel_lo(.Lpcrel_hi78)(s4) - bgeu a2, s11, .LBB5_67 -# %bb.66: # in Loop: Header=BB5_48 Depth=1 + bgeu a2, s11, .LBB5_66 +# %bb.65: # in Loop: Header=BB5_47 Depth=1 li a3, 0 - j .LBB5_70 -.LBB5_67: # %vector.ph207 - # in Loop: Header=BB5_48 Depth=1 + j .LBB5_69 +.LBB5_66: # %vector.ph207 + # in Loop: Header=BB5_47 Depth=1 srli a3, s5, 3 li a4, 254 mul a3, a3, a4 @@ -3296,20 +3292,20 @@ ld a6, 72(sp) # 8-byte Folded Reload add a5, a6, a5 mv a6, a3 -.LBB5_68: # %vector.body212 - # Parent Loop BB5_48 Depth=1 +.LBB5_67: # %vector.body212 + # Parent Loop BB5_47 Depth=1 # => This Inner Loop Header: Depth=2 vs2r.v v8, (a5) vs1r.v v10, (a4) add a4, a4, s5 sub a6, a6, s11 add a5, a5, a7 - bnez a6, .LBB5_68 -# %bb.69: # %middle.block204 - # in Loop: Header=BB5_48 Depth=1 - beq a3, a2, .LBB5_47 -.LBB5_70: # %for.body226.preheader - # in Loop: Header=BB5_48 Depth=1 + bnez a6, .LBB5_67 +# %bb.68: # %middle.block204 + # in Loop: Header=BB5_47 Depth=1 + beq a3, a2, .LBB5_46 +.LBB5_69: # %for.body226.preheader + # in Loop: Header=BB5_47 Depth=1 add a4, a3, a1 slli a3, a4, 2 ld a6, 64(sp) # 8-byte Folded Reload @@ -3320,35 +3316,35 @@ add a1, a2, a1 slli a1, a1, 2 add a1, a6, a1 -.LBB5_71: # %for.body226 - # Parent Loop BB5_48 Depth=1 +.LBB5_70: # %for.body226 + # Parent Loop BB5_47 Depth=1 # => This Inner Loop Header: Depth=2 sd s0, 0(a4) sw a0, 0(a3) addi a3, a3, 4 addi a4, a4, 8 - bne a3, a1, .LBB5_71 - j .LBB5_47 -.LBB5_72: # %for.end238 + bne a3, a1, .LBB5_70 + j .LBB5_46 +.LBB5_71: # %for.end238 ld a0, 112(sp) # 8-byte Folded Reload ld a0, %pcrel_lo(.Lpcrel_hi82)(a0) ld a3, 16(sp) # 8-byte Folded Reload - beqz a0, .LBB5_76 -# %bb.73: # %VEC_move_t_base_length.exit123 + beqz a0, .LBB5_75 +# %bb.72: # %VEC_move_t_base_length.exit123 lw a1, 0(a0) - blez a1, .LBB5_76 -# %bb.74: # %for.body250.lr.ph + blez a1, .LBB5_75 +# %bb.73: # %for.body250.lr.ph slli a1, a1, 3 add a1, a0, a1 -.LBB5_75: # %for.body250 +.LBB5_74: # %for.body250 # =>This Inner Loop Header: Depth=1 ld a2, 0(a1) sd zero, 16(a2) addi a1, a1, -8 sd a2, 16(a3) mv a3, a2 - bne a1, a0, .LBB5_75 -.LBB5_76: # %cleanup + bne a1, a0, .LBB5_74 +.LBB5_75: # %cleanup ld a0, 24(sp) # 8-byte Folded Reload csrr a1, vlenb slli a1, a1, 1 --- build.a/External/SPEC/CFP2017rate/544.nab_r/CMakeFiles/544.nab_r.dir/root/cpu2017/benchspec/CPU/544.nab_r/src/regex-alpha/regcomp.s 2024-04-01 12:40:59.206454390 +0000 +++ build.b/External/SPEC/CFP2017rate/544.nab_r/CMakeFiles/544.nab_r.dir/root/cpu2017/benchspec/CPU/544.nab_r/src/regex-alpha/regcomp.s 2024-04-01 12:41:11.226119187 +0000 @@ -5231,22 +5231,20 @@ li a5, 0 srli a6, a6, 1 negw a1, a6 - vsetvli a7, zero, e32, m2, ta, ma - vmv.v.i v10, 0 and a1, a2, a1 + vsetvli a7, zero, e32, m2, ta, ma vmv.v.i v8, 0 .LBB5_229: # %vector.body356 # =>This Inner Loop Header: Depth=1 andi a7, a5, 252 add a7, a3, a7 - vle8.v v12, (a7) vsetvli zero, zero, e8, mf2, ta, ma - vand.vx v12, v12, a4 - vmsne.vi v0, v12, 0 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v12, v10, 1, v0 + vle8.v v10, (a7) + vand.vx v10, v10, a4 + vmsne.vi v0, v10, 0 + vsetvli zero, zero, e32, m2, ta, mu addw a5, a5, a6 - vadd.vv v8, v8, v12 + vadd.vi v8, v8, 1, v0.t bne a1, a5, .LBB5_229 # %bb.230: # %middle.block348 vmv.s.x v10, zero --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ipa-pure-const.s 2024-04-01 12:41:00.426420367 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/ipa-pure-const.s 2024-04-01 12:41:12.482084160 +0000 @@ -1042,39 +1042,37 @@ li t2, 32 li t3, 296 vsetvli zero, zero, e64, m2, ta, ma - vmv.v.i v12, 0 - vmv1r.v v10, v9 + vmv.v.i v10, 0 .LBB3_8: # %vector.body # =>This Inner Loop Header: Depth=1 and t4, a6, t0 slli t4, t4, 3 add t4, a7, t4 - vl2re64.v v14, (t4) + vl2re64.v v12, (t4) vsetvli zero, zero, e16, mf2, ta, ma - vluxei64.v v8, (t1), v14 + vluxei64.v v8, (t1), v12 vand.vx v8, v8, t2 vmsne.vi v8, v8, 0 vmv1r.v v0, v8 - vlse32.v v11, (a0), zero, v0.t + vlse32.v v14, (a0), zero, v0.t vsetvli zero, zero, e32, m1, ta, ma - vluxei64.v v16, (t3), v14, v0.t - vmsltu.vv v11, v16, v11 - vmand.mm v0, v8, v11 + vluxei64.v v15, (t3), v12, v0.t + vmsltu.vv v12, v15, v14 + vmand.mm v0, v8, v12 vsetvli zero, zero, e64, m2, ta, mu - vzext.vf2 v14, v16 - vsll.vi v14, v14, 3 - vmv2r.v v16, v12 - vluxei64.v v16, (a1), v14, v0.t - vmsne.vi v11, v16, 0 - vmand.mm v0, v8, v11 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v8, v9, 1, v0 + vzext.vf2 v12, v15 + vsll.vi v12, v12, 3 + vmv2r.v v14, v10 + vluxei64.v v14, (a1), v12, v0.t + vmsne.vi v12, v14, 0 + vmand.mm v0, v8, v12 + vsetvli zero, zero, e32, m1, ta, mu add a6, a6, a5 - vadd.vv v10, v10, v8 + vadd.vi v9, v9, 1, v0.t bne a3, a6, .LBB3_8 # %bb.9: # %middle.block vmv.s.x v8, zero - vredsum.vs v8, v10, v8 + vredsum.vs v8, v9, v8 vmv.x.s a5, v8 bne a3, a4, .LBB3_20 .LBB3_10: # %for.end.loopexit147 --- build.a/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/loop-invariant.s 2024-04-01 12:41:00.438420033 +0000 +++ build.b/External/SPEC/CINT2017speed/602.gcc_s/CMakeFiles/602.gcc_s.dir/root/cpu2017/benchspec/CPU/502.gcc_r/src/loop-invariant.s 2024-04-01 12:41:12.498083715 +0000 @@ -35,9 +35,10 @@ .cfi_offset s10, -96 .cfi_offset s11, -104 csrr a0, vlenb - slli a0, a0, 3 + li a1, 6 + mul a0, a0, a1 sub sp, sp, a0 - .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x05, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 720 + 8 * vlenb + .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x05, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 720 + 6 * vlenb .Lpcrel_hi0: auipc a0, %got_pcrel_hi(flag_ira_loop_pressure) ld a0, %pcrel_lo(.Lpcrel_hi0)(a0) @@ -161,15 +162,13 @@ li a0, 1 li a1, 184 csrr a2, vlenb - slli a3, a2, 2 - add a2, a3, a2 + slli a2, a2, 2 add a2, sp, a2 addi a2, a2, 608 vs1r.v v8, (a2) # Unknown-size Folded Spill call xcalloc csrr a1, vlenb - slli a2, a1, 2 - add a1, a2, a1 + slli a1, a1, 2 add a1, sp, a1 addi a1, a1, 608 vl1r.v v8, (a1) # Unknown-size Folded Reload @@ -273,8 +272,7 @@ vsetvli a1, zero, e32, m2, ta, ma vmv.v.i v8, 0 csrr a1, vlenb - slli a2, a1, 2 - add a1, a2, a1 + slli a1, a1, 2 add a1, sp, a1 addi a1, a1, 608 vs2r.v v8, (a1) # Unknown-size Folded Spill @@ -373,8 +371,7 @@ ld a0, 200(sp) # 8-byte Folded Reload lw a0, 0(a0) csrr a1, vlenb - slli a2, a1, 2 - add a1, a2, a1 + slli a1, a1, 2 add a1, sp, a1 addi a1, a1, 608 vl2r.v v16, (a1) # Unknown-size Folded Reload @@ -1885,15 +1882,14 @@ vsetvli zero, zero, e32, m1, ta, ma vmv.v.i v8, 0 vsetvli zero, zero, e32, m1, tu, ma - vmv.v.i v9, 0 - vmv.s.x v9, a2 + vmv.s.x v8, a2 addi a1, sp, 608 - vs1r.v v9, (a1) # Unknown-size Folded Spill + vs1r.v v8, (a1) # Unknown-size Folded Spill .Lpcrel_hi75: auipc a1, %got_pcrel_hi(ira_available_class_regs) ld a7, %pcrel_lo(.Lpcrel_hi75)(a1) vsetvli a1, zero, e32, m2, ta, ma - vmv.v.i v10, 0 + vmv.v.i v8, 0 sd s9, 200(sp) # 8-byte Folded Spill .LBB0_233: # %for.body # =>This Loop Header: Depth=1 @@ -1943,16 +1939,10 @@ sd s6, 56(sp) # 8-byte Folded Spill sd a0, 48(sp) # 8-byte Folded Spill csrr a0, vlenb - slli a1, a0, 2 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 608 - vs2r.v v10, (a0) # Unknown-size Folded Spill - csrr a0, vlenb slli a0, a0, 2 add a0, sp, a0 addi a0, a0, 608 - vs1r.v v8, (a0) # Unknown-size Folded Spill + vs2r.v v8, (a0) # Unknown-size Folded Spill ld a0, 136(sp) # 8-byte Folded Reload sw s4, %pcrel_lo(.Lpcrel_hi73)(a0) li a1, 100 @@ -3381,8 +3371,7 @@ and a2, a2, a0 mv a4, a2 csrr a5, vlenb - slli t0, a5, 2 - add a5, t0, a5 + slli a5, a5, 2 add a5, sp, a5 addi a5, a5, 608 vl2r.v v16, (a5) # Unknown-size Folded Reload @@ -3442,11 +3431,6 @@ vl2r.v v10, (a7) # Unknown-size Folded Reload addi a7, sp, 608 vl1r.v v8, (a7) # Unknown-size Folded Reload - csrr a7, vlenb - slli a7, a7, 2 - add a7, sp, a7 - addi a7, a7, 608 - vl1r.v v16, (a7) # Unknown-size Folded Reload ld a7, 16(sp) # 8-byte Folded Reload .LBB0_433: # %vector.body504 # Parent Loop BB0_233 Depth=1 @@ -3459,9 +3443,8 @@ vluxei64.v v12, (zero), v12, v0.t vmsne.vi v9, v12, 0 vmand.mm v0, v0, v9 - vsetvli zero, zero, e32, m1, ta, ma - vmerge.vim v9, v16, 1, v0 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, m1, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e64, m2, ta, ma vadd.vx v10, v10, a7 sub a5, a5, a7 @@ -3982,13 +3965,7 @@ slli a0, a0, 2 add a0, sp, a0 addi a0, a0, 608 - vl1r.v v8, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - slli a1, a0, 2 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 608 - vl2r.v v10, (a0) # Unknown-size Folded Reload + vl2r.v v8, (a0) # Unknown-size Folded Reload ld a0, 48(sp) # 8-byte Folded Reload ld a6, 32(sp) # 8-byte Folded Reload ld a7, 96(sp) # 8-byte Folded Reload @@ -4203,7 +4180,8 @@ sd zero, %pcrel_lo(.Lpcrel_hi70)(s10) sw zero, %pcrel_lo(.Lpcrel_hi71)(s11) csrr a0, vlenb - slli a0, a0, 3 + li a1, 6 + mul a0, a0, a1 add sp, sp, a0 ld ra, 712(sp) # 8-byte Folded Reload ld s0, 704(sp) # 8-byte Folded Reload --- build.a/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/savewolf.s 2024-04-01 12:41:03.022347971 +0000 +++ build.b/MultiSource/Benchmarks/Prolangs-C/TimberWolfMC/CMakeFiles/timberwolfmc.dir/savewolf.s 2024-04-01 12:41:15.110010875 +0000 @@ -182,10 +182,9 @@ .cfi_offset fs0, -112 .cfi_offset fs1, -120 csrr a1, vlenb - li a2, 6 - mul a1, a1, a2 + slli a1, a1, 2 sub sp, sp, a1 - .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x80, 0x02, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 256 + 6 * vlenb + .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x80, 0x02, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 256 + 4 * vlenb mv s9, a0 .Lpcrel_hi8: auipc a0, %pcrel_hi(.L.str.5) @@ -197,9 +196,8 @@ mv a0, s9 call __isoc99_fscanf li s8, 4 - beq a0, s8, .LBB1_1 - j .LBB1_202 -.LBB1_1: # %while.body.preheader + bne a0, s8, .LBB1_202 +# %bb.1: # %while.body.preheader csrr a0, vlenb srli s5, a0, 1 li a1, 10 @@ -214,32 +212,16 @@ .Lpcrel_hi11: auipc a0, %pcrel_hi(.LCPI1_1) fld fs1, %pcrel_lo(.Lpcrel_hi11)(a0) - vsetivli zero, 2, e32, mf2, ta, ma - vmv.v.i v8, 0 - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 128 - vs1r.v v8, (a0) # Unknown-size Folded Spill - vsetivli zero, 4, e32, m1, ta, ma - vmv.v.i v16, 0 li s6, 7 li s2, 44 li s10, 20 vsetvli a0, zero, e32, m2, ta, ma vmv.v.i v8, 0 csrr a0, vlenb - slli a1, a0, 1 - add a0, a1, a0 + slli a0, a0, 1 add a0, sp, a0 addi a0, a0, 128 vs2r.v v8, (a0) # Unknown-size Folded Spill - csrr a0, vlenb - slli a1, a0, 2 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 128 - vs1r.v v16, (a0) # Unknown-size Folded Spill sd s9, 48(sp) # 8-byte Folded Spill sd s3, 40(sp) # 8-byte Folded Spill sd s4, 32(sp) # 8-byte Folded Spill @@ -259,11 +241,10 @@ mv a0, s9 call __isoc99_fscanf csrr a1, vlenb - slli a2, a1, 2 - add a1, a2, a1 + slli a1, a1, 1 add a1, sp, a1 addi a1, a1, 128 - vl1r.v v16, (a1) # Unknown-size Folded Reload + vl2r.v v8, (a1) # Unknown-size Folded Reload bne a0, s8, .LBB1_202 .LBB1_3: # %while.body # =>This Loop Header: Depth=1 @@ -309,11 +290,10 @@ sext.w a1, a0 addi a0, s1, 1 csrr a2, vlenb - slli a3, a2, 2 - add a2, a3, a2 + slli a2, a2, 1 add a2, sp, a2 addi a2, a2, 128 - vl1r.v v16, (a2) # Unknown-size Folded Reload + vl2r.v v8, (a2) # Unknown-size Folded Reload bge s1, a1, .LBB1_16 .LBB1_6: # %for.body # Parent Loop BB1_3 Depth=1 @@ -343,12 +323,6 @@ addi a5, a0, 20 vsetvli a6, zero, e32, m2, ta, ma mv a6, a4 - csrr a7, vlenb - slli t0, a7, 1 - add a7, t0, a7 - add a7, sp, a7 - addi a7, a7, 128 - vl2r.v v8, (a7) # Unknown-size Folded Reload .LBB1_10: # %vector.body # Parent Loop BB1_3 Depth=1 # Parent Loop BB1_6 Depth=2 @@ -443,7 +417,7 @@ addi a0, s8, 72 vsetivli zero, 2, e32, mf2, ta, ma li a1, 8 - vlse32.v v17, (a0), a1 + vlse32.v v16, (a0), a1 addi a0, s8, 76 vlse32.v v8, (a0), a1 ld s1, 96(s8) @@ -452,7 +426,7 @@ beqz a0, .LBB1_201 .LBB1_22: # %for.body74.preheader.split # in Loop: Header=BB1_3 Depth=1 - vsub.vv v8, v8, v17 + vsub.vv v8, v8, v16 vfwcvt.f.x.v v9, v8 vsetvli zero, zero, e64, m1, ta, ma vfmul.vf v8, v9, fa0 @@ -464,23 +438,17 @@ vsetvli zero, zero, e64, m1, ta, ma vfsub.vv v8, v8, v10 vmfge.vf v0, v8, fs1 - vsetvli zero, zero, e32, mf2, ta, ma - csrr a0, vlenb - slli a0, a0, 1 - add a0, sp, a0 - addi a0, a0, 128 - vl1r.v v8, (a0) # Unknown-size Folded Reload - vmerge.vim v8, v8, 1, v0 - vadd.vv v8, v8, v9 - vsrl.vi v9, v8, 31 - vadd.vv v8, v8, v9 + vsetvli zero, zero, e32, mf2, ta, mu + vadd.vi v9, v9, 1, v0.t + vsrl.vi v8, v9, 31 + vadd.vv v8, v9, v8 vsra.vi v8, v8, 1 vrsub.vi v8, v8, 0 vwaddu.vv v9, v8, v8 li a0, -1 vwmaccu.vx v9, a0, v8 - vwaddu.vv v10, v17, v17 - vwmaccu.vx v10, a0, v17 + vwaddu.vv v10, v16, v16 + vwmaccu.vx v10, a0, v16 mv a0, s8 .LBB1_23: # %for.body74 # Parent Loop BB1_3 Depth=1 @@ -501,11 +469,10 @@ vsetvli zero, zero, e64, m2, ta, ma vfsub.vv v12, v14, v12 vmfge.vf v0, v12, fs1 - vsetvli zero, zero, e32, m1, ta, ma + vsetvli zero, zero, e32, m1, ta, mu ld a0, 0(a0) - vmerge.vim v12, v16, 1, v0 vadd.vv v11, v9, v11 - vadd.vv v11, v11, v12 + vadd.vi v11, v11, 1, v0.t vse32.v v11, (a1) bnez a0, .LBB1_23 # %bb.24: # %for.cond145.preheader @@ -513,10 +480,10 @@ beqz s2, .LBB1_27 # %bb.25: # %for.body148.preheader # in Loop: Header=BB1_3 Depth=1 - vmv.x.s a0, v17 + vmv.x.s a0, v16 vmv.x.s a1, v8 vsetivli zero, 1, e32, mf2, ta, ma - vslidedown.vi v9, v17, 1 + vslidedown.vi v9, v16, 1 vmv.x.s a2, v9 vslidedown.vi v9, v8, 1 vmv.x.s a3, v9 @@ -556,9 +523,9 @@ # %bb.28: # %for.body189.preheader # in Loop: Header=BB1_3 Depth=1 vsetivli zero, 1, e32, mf2, ta, ma - vmv.x.s a1, v17 + vmv.x.s a1, v16 vmv.x.s a2, v8 - vslidedown.vi v9, v17, 1 + vslidedown.vi v9, v16, 1 vmv.x.s a3, v9 vslidedown.vi v8, v8, 1 vmv.x.s a4, v8 @@ -1649,7 +1616,7 @@ csrr a0, vlenb add a0, sp, a0 addi a0, a0, 128 - vs1r.v v17, (a0) # Unknown-size Folded Spill + vs1r.v v16, (a0) # Unknown-size Folded Spill addi a0, sp, 128 vs1r.v v8, (a0) # Unknown-size Folded Spill call sqrt @@ -1658,19 +1625,12 @@ csrr a0, vlenb add a0, sp, a0 addi a0, a0, 128 - vl1r.v v17, (a0) # Unknown-size Folded Reload - csrr a0, vlenb - slli a1, a0, 2 - add a0, a1, a0 - add a0, sp, a0 - addi a0, a0, 128 vl1r.v v16, (a0) # Unknown-size Folded Reload vsetivli zero, 2, e32, mf2, ta, ma j .LBB1_22 .LBB1_202: # %while.end csrr a0, vlenb - li a1, 6 - mul a0, a0, a1 + slli a0, a0, 2 add sp, sp, a0 ld ra, 248(sp) # 8-byte Folded Reload ld s0, 240(sp) # 8-byte Folded Reload --- build.a/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20050224-1.dir/20050224-1.s 2024-04-01 12:41:03.262341278 +0000 +++ build.b/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-20050224-1.dir/20050224-1.s 2024-04-01 12:41:15.350004183 +0000 @@ -83,33 +83,30 @@ vadd.vx v12, v8, t3 slli t0, t0, 11 vsetvli zero, zero, e32, m2, ta, ma - vmv.v.i v18, 0 + vmv.v.i v10, 0 vsetvli zero, zero, e64, m4, ta, ma - vmv.v.x v24, a1 - vmv.v.x v28, a2 + vmv.v.x v20, a1 + vmv.v.x v24, a2 mv t2, t1 - vmv2r.v v20, v18 - vmv2r.v v16, v18 - vmv2r.v v10, v18 + vmv2r.v v18, v10 + vmv2r.v v16, v10 .LBB1_6: # %vector.body # =>This Inner Loop Header: Depth=1 - vmsltu.vx v0, v12, a1 + vmsleu.vv v0, v20, v12 vmsleu.vv v8, v24, v12 - vmsleu.vv v9, v28, v12 - vmsltu.vx v22, v12, a3 - vmand.mm v9, v9, v22 - vmandn.mm v22, v8, v9 - vmsltu.vx v23, v12, a4 + vmsltu.vx v9, v12, a3 vmand.mm v8, v8, v9 - vsetvli zero, zero, e32, m2, ta, ma - vmerge.vim v6, v18, 1, v0 - vmand.mm v0, v22, v23 - vadd.vv v10, v10, v6 - vmerge.vim v22, v18, 1, v0 - vadd.vv v16, v16, v22 + vmandn.mm v9, v0, v8 + vmsltu.vx v28, v12, a4 + vmand.mm v8, v0, v8 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v30, v16, 1 + vmand.mm v9, v9, v28 + vmerge.vvm v16, v30, v16, v0 + vmv1r.v v0, v9 + vadd.vi v18, v18, 1, v0.t vmv1r.v v0, v8 - vmerge.vim v8, v18, 1, v0 - vadd.vv v20, v20, v8 + vadd.vi v10, v10, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub t2, t2, a6 vadd.vx v12, v12, t0 @@ -117,11 +114,11 @@ # %bb.7: # %middle.block vmv.s.x v8, zero vsetvli zero, zero, e32, m2, ta, ma - vredsum.vs v9, v20, v8 + vredsum.vs v9, v10, v8 vmv.x.s a6, v9 - vredsum.vs v9, v16, v8 + vredsum.vs v9, v18, v8 vmv.x.s t0, v9 - vredsum.vs v8, v10, v8 + vredsum.vs v8, v16, v8 vmv.x.s t2, v8 bne a7, t1, .LBB1_12 .LBB1_8: # %for.end --- build.a/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-builtin-bitops-1.dir/builtin-bitops-1.s 2024-04-01 12:41:03.286340609 +0000 +++ build.b/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-builtin-bitops-1.dir/builtin-bitops-1.s 2024-04-01 12:41:15.378003402 +0000 @@ -863,10 +863,10 @@ .LBB10_3: # %vector.body # =>This Inner Loop Header: Depth=1 vsrl.vv v20, v16, v12 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v10, v20, 0 - vand.vi v10, v10, 1 - vadd.vv v8, v8, v10 + vand.vi v20, v20, 1 + vmsne.vi v0, v20, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub a3, a3, a1 vadd.vx v12, v12, a1 @@ -922,10 +922,10 @@ .LBB11_3: # %vector.body # =>This Inner Loop Header: Depth=1 vsrl.vv v20, v16, v12 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v10, v20, 0 - vand.vi v10, v10, 1 - vadd.vv v8, v8, v10 + vand.vi v20, v20, 1 + vmsne.vi v0, v20, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub a3, a3, a2 vadd.vx v12, v12, a2 @@ -1100,10 +1100,10 @@ .LBB16_3: # %vector.body # =>This Inner Loop Header: Depth=1 vsrl.vv v20, v16, v12 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v10, v20, 0 - vand.vi v10, v10, 1 - vadd.vv v8, v8, v10 + vand.vi v20, v20, 1 + vmsne.vi v0, v20, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub a3, a3, a1 vadd.vx v12, v12, a1 @@ -1159,10 +1159,10 @@ .LBB17_3: # %vector.body # =>This Inner Loop Header: Depth=1 vsrl.vv v20, v16, v12 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v10, v20, 0 - vand.vi v10, v10, 1 - vadd.vv v8, v8, v10 + vand.vi v20, v20, 1 + vmsne.vi v0, v20, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v8, v8, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub a3, a3, a2 vadd.vx v12, v12, a2 @@ -1652,10 +1652,10 @@ # Parent Loop BB18_12 Depth=1 # => This Inner Loop Header: Depth=2 vsrl.vv v24, v16, v20 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v28, v24, 0 - vand.vi v24, v28, 1 - vadd.vv v14, v14, v24 + vand.vi v24, v24, 1 + vmsne.vi v0, v24, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v14, v14, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub s4, s4, a2 vadd.vx v20, v20, a2 @@ -1708,10 +1708,10 @@ # Parent Loop BB18_12 Depth=1 # => This Inner Loop Header: Depth=2 vsrl.vv v24, v16, v20 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v28, v24, 0 - vand.vi v24, v28, 1 - vadd.vv v14, v14, v24 + vand.vi v24, v24, 1 + vmsne.vi v0, v24, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v14, v14, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub s5, s5, a2 vadd.vx v20, v20, a2 @@ -1950,10 +1950,10 @@ # Parent Loop BB18_50 Depth=1 # => This Inner Loop Header: Depth=2 vsrl.vv v24, v16, v20 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v28, v24, 0 - vand.vi v24, v28, 1 - vadd.vv v14, v14, v24 + vand.vi v24, v24, 1 + vmsne.vi v0, v24, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v14, v14, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub s4, s4, a2 vadd.vx v20, v20, a2 @@ -2006,10 +2006,10 @@ # Parent Loop BB18_50 Depth=1 # => This Inner Loop Header: Depth=2 vsrl.vv v24, v16, v20 - vsetvli zero, zero, e32, m2, ta, ma - vnsrl.wi v28, v24, 0 - vand.vi v24, v28, 1 - vadd.vv v14, v14, v24 + vand.vi v24, v24, 1 + vmsne.vi v0, v24, 0 + vsetvli zero, zero, e32, m2, ta, mu + vadd.vi v14, v14, 1, v0.t vsetvli zero, zero, e64, m4, ta, ma sub s5, s5, a2 vadd.vx v20, v20, a2