@@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
456
456
define i64 @fcvt_l_bf16_sat (bfloat %a ) nounwind {
457
457
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
458
458
; RV32IZFBFMIN: # %bb.0: # %start
459
- ; RV32IZFBFMIN-NEXT: addi sp, sp, -16
460
- ; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
461
- ; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
462
- ; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
459
+ ; RV32IZFBFMIN-NEXT: addi sp, sp, -32
460
+ ; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
461
+ ; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
462
+ ; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
463
+ ; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
464
+ ; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
465
+ ; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
466
+ ; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
467
+ ; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
463
468
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
469
+ ; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
470
+ ; RV32IZFBFMIN-NEXT: neg s1, s0
464
471
; RV32IZFBFMIN-NEXT: lui a0, 913408
465
472
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
466
- ; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
473
+ ; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
474
+ ; RV32IZFBFMIN-NEXT: neg s3, s2
467
475
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
468
476
; RV32IZFBFMIN-NEXT: call __fixsfdi
477
+ ; RV32IZFBFMIN-NEXT: and a0, s3, a0
478
+ ; RV32IZFBFMIN-NEXT: or a0, s1, a0
479
+ ; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
480
+ ; RV32IZFBFMIN-NEXT: neg a2, a2
469
481
; RV32IZFBFMIN-NEXT: lui a4, 524288
470
- ; RV32IZFBFMIN-NEXT: lui a2, 524288
471
- ; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
482
+ ; RV32IZFBFMIN-NEXT: li a5, 1
483
+ ; RV32IZFBFMIN-NEXT: lui a3, 524288
484
+ ; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2
472
485
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
473
- ; RV32IZFBFMIN-NEXT: mv a2 , a1
486
+ ; RV32IZFBFMIN-NEXT: mv a3 , a1
474
487
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
475
- ; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
476
- ; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
477
- ; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
478
- ; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
488
+ ; RV32IZFBFMIN-NEXT: and a0, a2, a0
489
+ ; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
479
490
; RV32IZFBFMIN-NEXT: # %bb.3:
480
- ; RV32IZFBFMIN-NEXT: addi a2 , a4, -1
491
+ ; RV32IZFBFMIN-NEXT: addi a3 , a4, -1
481
492
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
482
- ; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
483
- ; RV32IZFBFMIN-NEXT: neg a4, a1
484
- ; RV32IZFBFMIN-NEXT: and a1, a4, a2
485
- ; RV32IZFBFMIN-NEXT: neg a2, a3
486
- ; RV32IZFBFMIN-NEXT: neg a3, s0
487
- ; RV32IZFBFMIN-NEXT: and a0, a3, a0
488
- ; RV32IZFBFMIN-NEXT: or a0, a2, a0
489
- ; RV32IZFBFMIN-NEXT: and a0, a4, a0
490
- ; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
491
- ; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
492
- ; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
493
- ; RV32IZFBFMIN-NEXT: addi sp, sp, 16
493
+ ; RV32IZFBFMIN-NEXT: and a1, a2, a3
494
+ ; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
495
+ ; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
496
+ ; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
497
+ ; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
498
+ ; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
499
+ ; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
500
+ ; RV32IZFBFMIN-NEXT: addi sp, sp, 32
494
501
; RV32IZFBFMIN-NEXT: ret
495
502
;
496
503
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
497
504
; R32IDZFBFMIN: # %bb.0: # %start
498
- ; R32IDZFBFMIN-NEXT: addi sp, sp, -16
499
- ; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
500
- ; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
505
+ ; R32IDZFBFMIN-NEXT: addi sp, sp, -32
506
+ ; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
507
+ ; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
508
+ ; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
509
+ ; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
510
+ ; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
501
511
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
512
+ ; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
513
+ ; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
502
514
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
515
+ ; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
516
+ ; R32IDZFBFMIN-NEXT: neg s1, s0
503
517
; R32IDZFBFMIN-NEXT: lui a0, 913408
504
518
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
505
- ; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
519
+ ; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
520
+ ; R32IDZFBFMIN-NEXT: neg s3, s2
506
521
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
507
522
; R32IDZFBFMIN-NEXT: call __fixsfdi
523
+ ; R32IDZFBFMIN-NEXT: and a0, s3, a0
524
+ ; R32IDZFBFMIN-NEXT: or a0, s1, a0
525
+ ; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
526
+ ; R32IDZFBFMIN-NEXT: neg a2, a2
508
527
; R32IDZFBFMIN-NEXT: lui a4, 524288
509
- ; R32IDZFBFMIN-NEXT: lui a2, 524288
510
- ; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
528
+ ; R32IDZFBFMIN-NEXT: li a5, 1
529
+ ; R32IDZFBFMIN-NEXT: lui a3, 524288
530
+ ; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2
511
531
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
512
- ; R32IDZFBFMIN-NEXT: mv a2 , a1
532
+ ; R32IDZFBFMIN-NEXT: mv a3 , a1
513
533
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
514
- ; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
515
- ; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
516
- ; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
517
- ; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
534
+ ; R32IDZFBFMIN-NEXT: and a0, a2, a0
535
+ ; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
518
536
; R32IDZFBFMIN-NEXT: # %bb.3:
519
- ; R32IDZFBFMIN-NEXT: addi a2 , a4, -1
537
+ ; R32IDZFBFMIN-NEXT: addi a3 , a4, -1
520
538
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
521
- ; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
522
- ; R32IDZFBFMIN-NEXT: neg a4, a1
523
- ; R32IDZFBFMIN-NEXT: and a1, a4, a2
524
- ; R32IDZFBFMIN-NEXT: neg a2, a3
525
- ; R32IDZFBFMIN-NEXT: neg a3, s0
526
- ; R32IDZFBFMIN-NEXT: and a0, a3, a0
527
- ; R32IDZFBFMIN-NEXT: or a0, a2, a0
528
- ; R32IDZFBFMIN-NEXT: and a0, a4, a0
529
- ; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
530
- ; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
539
+ ; R32IDZFBFMIN-NEXT: and a1, a2, a3
540
+ ; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
541
+ ; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
542
+ ; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
543
+ ; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
544
+ ; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
531
545
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
532
- ; R32IDZFBFMIN-NEXT: addi sp, sp, 16
546
+ ; R32IDZFBFMIN-NEXT: addi sp, sp, 32
533
547
; R32IDZFBFMIN-NEXT: ret
534
548
;
535
549
; RV32ID-LABEL: fcvt_l_bf16_sat:
536
550
; RV32ID: # %bb.0: # %start
537
- ; RV32ID-NEXT: addi sp, sp, -16
538
- ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
539
- ; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
551
+ ; RV32ID-NEXT: addi sp, sp, -32
552
+ ; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
553
+ ; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
554
+ ; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
555
+ ; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
556
+ ; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
540
557
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
558
+ ; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
559
+ ; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
541
560
; RV32ID-NEXT: fmv.x.w a0, fa0
542
561
; RV32ID-NEXT: slli a0, a0, 16
543
562
; RV32ID-NEXT: fmv.w.x fs0, a0
563
+ ; RV32ID-NEXT: flt.s s0, fa5, fs0
564
+ ; RV32ID-NEXT: neg s1, s0
544
565
; RV32ID-NEXT: lui a0, 913408
545
566
; RV32ID-NEXT: fmv.w.x fa5, a0
546
- ; RV32ID-NEXT: fle.s s0, fa5, fs0
567
+ ; RV32ID-NEXT: fle.s s2, fa5, fs0
568
+ ; RV32ID-NEXT: neg s3, s2
547
569
; RV32ID-NEXT: fmv.s fa0, fs0
548
570
; RV32ID-NEXT: call __fixsfdi
571
+ ; RV32ID-NEXT: and a0, s3, a0
572
+ ; RV32ID-NEXT: or a0, s1, a0
573
+ ; RV32ID-NEXT: feq.s a2, fs0, fs0
574
+ ; RV32ID-NEXT: neg a2, a2
549
575
; RV32ID-NEXT: lui a4, 524288
550
- ; RV32ID-NEXT: lui a2, 524288
551
- ; RV32ID-NEXT: beqz s0, .LBB10_2
576
+ ; RV32ID-NEXT: li a5, 1
577
+ ; RV32ID-NEXT: lui a3, 524288
578
+ ; RV32ID-NEXT: bne s2, a5, .LBB10_2
552
579
; RV32ID-NEXT: # %bb.1: # %start
553
- ; RV32ID-NEXT: mv a2 , a1
580
+ ; RV32ID-NEXT: mv a3 , a1
554
581
; RV32ID-NEXT: .LBB10_2: # %start
555
- ; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
556
- ; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
557
- ; RV32ID-NEXT: flt.s a3, fa5, fs0
558
- ; RV32ID-NEXT: beqz a3, .LBB10_4
582
+ ; RV32ID-NEXT: and a0, a2, a0
583
+ ; RV32ID-NEXT: beqz s0, .LBB10_4
559
584
; RV32ID-NEXT: # %bb.3:
560
- ; RV32ID-NEXT: addi a2 , a4, -1
585
+ ; RV32ID-NEXT: addi a3 , a4, -1
561
586
; RV32ID-NEXT: .LBB10_4: # %start
562
- ; RV32ID-NEXT: feq.s a1, fs0, fs0
563
- ; RV32ID-NEXT: neg a4, a1
564
- ; RV32ID-NEXT: and a1, a4, a2
565
- ; RV32ID-NEXT: neg a2, a3
566
- ; RV32ID-NEXT: neg a3, s0
567
- ; RV32ID-NEXT: and a0, a3, a0
568
- ; RV32ID-NEXT: or a0, a2, a0
569
- ; RV32ID-NEXT: and a0, a4, a0
570
- ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
571
- ; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
587
+ ; RV32ID-NEXT: and a1, a2, a3
588
+ ; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
589
+ ; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
590
+ ; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
591
+ ; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
592
+ ; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
572
593
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
573
- ; RV32ID-NEXT: addi sp, sp, 16
594
+ ; RV32ID-NEXT: addi sp, sp, 32
574
595
; RV32ID-NEXT: ret
575
596
;
576
597
; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat:
@@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
654
675
; CHECK32ZFBFMIN-NEXT: neg s0, a0
655
676
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero
656
677
; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0
657
- ; CHECK32ZFBFMIN-NEXT: neg s1, a0
678
+ ; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1
679
+ ; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1
658
680
; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
659
681
; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
660
682
; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
@@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
681
703
; RV32ID-NEXT: neg s0, a0
682
704
; RV32ID-NEXT: fmv.w.x fa5, zero
683
705
; RV32ID-NEXT: fle.s a0, fa5, fa0
684
- ; RV32ID-NEXT: neg s1, a0
706
+ ; RV32ID-NEXT: xori a0, a0, 1
707
+ ; RV32ID-NEXT: addi s1, a0, -1
685
708
; RV32ID-NEXT: call __fixunssfdi
686
709
; RV32ID-NEXT: and a0, s1, a0
687
710
; RV32ID-NEXT: or a0, s0, a0
0 commit comments