@@ -456,92 +456,80 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
456456define i64 @fcvt_l_bf16_sat (bfloat %a ) nounwind {
457457; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
458458; RV32IZFBFMIN: # %bb.0: # %start
459- ; RV32IZFBFMIN-NEXT: addi sp, sp, -32
460- ; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
461- ; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
462- ; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
463- ; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
464- ; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
465- ; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
466- ; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
467- ; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
459+ ; RV32IZFBFMIN-NEXT: addi sp, sp, -16
460+ ; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
461+ ; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
462+ ; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
468463; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
469- ; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
470- ; RV32IZFBFMIN-NEXT: neg s1, s0
471464; RV32IZFBFMIN-NEXT: lui a0, 913408
472465; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
473- ; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
474- ; RV32IZFBFMIN-NEXT: neg s3, s2
466+ ; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
475467; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
476468; RV32IZFBFMIN-NEXT: call __fixsfdi
477- ; RV32IZFBFMIN-NEXT: and a0, s3, a0
478- ; RV32IZFBFMIN-NEXT: or a0, s1, a0
479- ; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
480- ; RV32IZFBFMIN-NEXT: neg a2, a2
481469; RV32IZFBFMIN-NEXT: lui a4, 524288
482- ; RV32IZFBFMIN-NEXT: lui a3 , 524288
483- ; RV32IZFBFMIN-NEXT: beqz s2 , .LBB10_2
470+ ; RV32IZFBFMIN-NEXT: lui a2 , 524288
471+ ; RV32IZFBFMIN-NEXT: beqz s0 , .LBB10_2
484472; RV32IZFBFMIN-NEXT: # %bb.1: # %start
485- ; RV32IZFBFMIN-NEXT: mv a3 , a1
473+ ; RV32IZFBFMIN-NEXT: mv a2 , a1
486474; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
487- ; RV32IZFBFMIN-NEXT: and a0, a2, a0
488- ; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
475+ ; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
476+ ; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
477+ ; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
478+ ; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
489479; RV32IZFBFMIN-NEXT: # %bb.3:
490- ; RV32IZFBFMIN-NEXT: addi a3 , a4, -1
480+ ; RV32IZFBFMIN-NEXT: addi a2 , a4, -1
491481; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
492- ; RV32IZFBFMIN-NEXT: and a1, a2, a3
493- ; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
494- ; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
495- ; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
496- ; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
497- ; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
498- ; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
499- ; RV32IZFBFMIN-NEXT: addi sp, sp, 32
482+ ; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
483+ ; RV32IZFBFMIN-NEXT: neg a4, a1
484+ ; RV32IZFBFMIN-NEXT: and a1, a4, a2
485+ ; RV32IZFBFMIN-NEXT: neg a2, a3
486+ ; RV32IZFBFMIN-NEXT: neg a3, s0
487+ ; RV32IZFBFMIN-NEXT: and a0, a3, a0
488+ ; RV32IZFBFMIN-NEXT: or a0, a2, a0
489+ ; RV32IZFBFMIN-NEXT: and a0, a4, a0
490+ ; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
491+ ; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
492+ ; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
493+ ; RV32IZFBFMIN-NEXT: addi sp, sp, 16
500494; RV32IZFBFMIN-NEXT: ret
501495;
502496; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
503497; R32IDZFBFMIN: # %bb.0: # %start
504- ; R32IDZFBFMIN-NEXT: addi sp, sp, -32
505- ; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
506- ; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
507- ; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
508- ; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
509- ; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
498+ ; R32IDZFBFMIN-NEXT: addi sp, sp, -16
499+ ; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
500+ ; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
510501; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
511- ; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
512- ; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
513502; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
514- ; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
515- ; R32IDZFBFMIN-NEXT: neg s1, s0
516503; R32IDZFBFMIN-NEXT: lui a0, 913408
517504; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
518- ; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
519- ; R32IDZFBFMIN-NEXT: neg s3, s2
505+ ; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
520506; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
521507; R32IDZFBFMIN-NEXT: call __fixsfdi
522- ; R32IDZFBFMIN-NEXT: and a0, s3, a0
523- ; R32IDZFBFMIN-NEXT: or a0, s1, a0
524- ; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
525- ; R32IDZFBFMIN-NEXT: neg a2, a2
526508; R32IDZFBFMIN-NEXT: lui a4, 524288
527- ; R32IDZFBFMIN-NEXT: lui a3 , 524288
528- ; R32IDZFBFMIN-NEXT: beqz s2 , .LBB10_2
509+ ; R32IDZFBFMIN-NEXT: lui a2 , 524288
510+ ; R32IDZFBFMIN-NEXT: beqz s0 , .LBB10_2
529511; R32IDZFBFMIN-NEXT: # %bb.1: # %start
530- ; R32IDZFBFMIN-NEXT: mv a3 , a1
512+ ; R32IDZFBFMIN-NEXT: mv a2 , a1
531513; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
532- ; R32IDZFBFMIN-NEXT: and a0, a2, a0
533- ; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
514+ ; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
515+ ; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
516+ ; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
517+ ; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
534518; R32IDZFBFMIN-NEXT: # %bb.3:
535- ; R32IDZFBFMIN-NEXT: addi a3 , a4, -1
519+ ; R32IDZFBFMIN-NEXT: addi a2 , a4, -1
536520; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
537- ; R32IDZFBFMIN-NEXT: and a1, a2, a3
538- ; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
539- ; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
540- ; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
541- ; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
542- ; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
521+ ; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
522+ ; R32IDZFBFMIN-NEXT: neg a4, a1
523+ ; R32IDZFBFMIN-NEXT: and a1, a4, a2
524+ ; R32IDZFBFMIN-NEXT: neg a2, a3
525+ ; R32IDZFBFMIN-NEXT: neg a3, s0
526+ ; R32IDZFBFMIN-NEXT: and a0, a3, a0
527+ ; R32IDZFBFMIN-NEXT: or a0, a2, a0
528+ ; R32IDZFBFMIN-NEXT: and a0, a4, a0
529+ ; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
530+ ; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
543531; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
544- ; R32IDZFBFMIN-NEXT: addi sp, sp, 32
532+ ; R32IDZFBFMIN-NEXT: addi sp, sp, 16
545533; R32IDZFBFMIN-NEXT: ret
546534;
547535; RV32ID-LABEL: fcvt_l_bf16_sat:
0 commit comments