Skip to content

Commit fc7fcdb

Browse files
committed
[RISCV] Insert a freeze before converting select to AND/OR.
Select blocks poison, but AND/OR do not. We need to insert a freeze to block poison propagation. This creates suboptimal codegen which I will try to fix with other patches. We should prioritize the correctness fix. Fixes llvm#84200.
1 parent 284b3ca commit fc7fcdb

21 files changed

+2599
-2386
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7246,25 +7246,25 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
72467246
// (select c, -1, y) -> -c | y
72477247
if (isAllOnesConstant(TrueV)) {
72487248
SDValue Neg = DAG.getNegative(CondV, DL, VT);
7249-
return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
7249+
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
72507250
}
72517251
// (select c, y, -1) -> (c-1) | y
72527252
if (isAllOnesConstant(FalseV)) {
72537253
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
72547254
DAG.getAllOnesConstant(DL, VT));
7255-
return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
7255+
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
72567256
}
72577257

72587258
// (select c, 0, y) -> (c-1) & y
72597259
if (isNullConstant(TrueV)) {
72607260
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
72617261
DAG.getAllOnesConstant(DL, VT));
7262-
return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
7262+
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
72637263
}
72647264
// (select c, y, 0) -> -c & y
72657265
if (isNullConstant(FalseV)) {
72667266
SDValue Neg = DAG.getNegative(CondV, DL, VT);
7267-
return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
7267+
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
72687268
}
72697269
}
72707270

@@ -7274,6 +7274,7 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
72747274
const APInt &FalseVal = FalseV->getAsAPIntVal();
72757275
if (~TrueVal == FalseVal) {
72767276
SDValue Neg = DAG.getNegative(CondV, DL, VT);
7277+
FalseV = DAG.getFreeze(FalseV);
72777278
return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
72787279
}
72797280
}
@@ -7289,14 +7290,14 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
72897290
// (select x, x, y) -> x | y
72907291
// (select !x, x, y) -> x & y
72917292
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7292-
return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7293-
FalseV);
7293+
return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, DAG.getFreeze(TrueV),
7294+
DAG.getFreeze(FalseV));
72947295
}
72957296
// (select x, y, x) -> x & y
72967297
// (select !x, y, x) -> x | y
72977298
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7298-
return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
7299-
FalseV);
7299+
return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, DAG.getFreeze(TrueV),
7300+
DAG.getFreeze(FalseV));
73007301
}
73017302
}
73027303

llvm/test/CodeGen/RISCV/alu64.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ define i64 @sltiu(i64 %a) nounwind {
5858
; RV32I-LABEL: sltiu:
5959
; RV32I: # %bb.0:
6060
; RV32I-NEXT: sltiu a0, a0, 3
61-
; RV32I-NEXT: seqz a1, a1
61+
; RV32I-NEXT: snez a1, a1
62+
; RV32I-NEXT: addi a1, a1, -1
6263
; RV32I-NEXT: and a0, a1, a0
6364
; RV32I-NEXT: li a1, 0
6465
; RV32I-NEXT: ret

llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
372372
; RV32IA-NEXT: # =>This Loop Header: Depth=1
373373
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
374374
; RV32IA-NEXT: mv a3, a2
375-
; RV32IA-NEXT: addi a2, a2, 1
376-
; RV32IA-NEXT: sltu a4, a3, a1
377-
; RV32IA-NEXT: neg a4, a4
378-
; RV32IA-NEXT: and a4, a4, a2
375+
; RV32IA-NEXT: addi a4, a2, 1
376+
; RV32IA-NEXT: sltu a2, a2, a1
377+
; RV32IA-NEXT: neg a2, a2
378+
; RV32IA-NEXT: and a4, a2, a4
379379
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
380380
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
381381
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
@@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
607607
; RV64IA-NEXT: # =>This Loop Header: Depth=1
608608
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
609609
; RV64IA-NEXT: mv a3, a2
610-
; RV64IA-NEXT: addi a2, a2, 1
611-
; RV64IA-NEXT: sltu a4, a3, a1
612-
; RV64IA-NEXT: neg a4, a4
613-
; RV64IA-NEXT: and a4, a4, a2
610+
; RV64IA-NEXT: addi a4, a2, 1
611+
; RV64IA-NEXT: sltu a2, a2, a1
612+
; RV64IA-NEXT: neg a2, a2
613+
; RV64IA-NEXT: and a4, a2, a4
614614
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
615615
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
616616
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2

llvm/test/CodeGen/RISCV/bfloat-convert.ll

Lines changed: 96 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
456456
define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
457457
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
458458
; RV32IZFBFMIN: # %bb.0: # %start
459-
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
460-
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
461-
; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
462-
; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
459+
; RV32IZFBFMIN-NEXT: addi sp, sp, -32
460+
; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
461+
; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
462+
; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
463+
; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
464+
; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
465+
; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
466+
; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
467+
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
463468
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
469+
; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
470+
; RV32IZFBFMIN-NEXT: neg s1, s0
464471
; RV32IZFBFMIN-NEXT: lui a0, 913408
465472
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
466-
; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
473+
; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
474+
; RV32IZFBFMIN-NEXT: neg s3, s2
467475
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
468476
; RV32IZFBFMIN-NEXT: call __fixsfdi
477+
; RV32IZFBFMIN-NEXT: and a0, s3, a0
478+
; RV32IZFBFMIN-NEXT: or a0, s1, a0
479+
; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
480+
; RV32IZFBFMIN-NEXT: neg a2, a2
469481
; RV32IZFBFMIN-NEXT: lui a4, 524288
470-
; RV32IZFBFMIN-NEXT: lui a2, 524288
471-
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
482+
; RV32IZFBFMIN-NEXT: li a5, 1
483+
; RV32IZFBFMIN-NEXT: lui a3, 524288
484+
; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2
472485
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
473-
; RV32IZFBFMIN-NEXT: mv a2, a1
486+
; RV32IZFBFMIN-NEXT: mv a3, a1
474487
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
475-
; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
476-
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
477-
; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
478-
; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
488+
; RV32IZFBFMIN-NEXT: and a0, a2, a0
489+
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
479490
; RV32IZFBFMIN-NEXT: # %bb.3:
480-
; RV32IZFBFMIN-NEXT: addi a2, a4, -1
491+
; RV32IZFBFMIN-NEXT: addi a3, a4, -1
481492
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
482-
; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
483-
; RV32IZFBFMIN-NEXT: neg a4, a1
484-
; RV32IZFBFMIN-NEXT: and a1, a4, a2
485-
; RV32IZFBFMIN-NEXT: neg a2, a3
486-
; RV32IZFBFMIN-NEXT: neg a3, s0
487-
; RV32IZFBFMIN-NEXT: and a0, a3, a0
488-
; RV32IZFBFMIN-NEXT: or a0, a2, a0
489-
; RV32IZFBFMIN-NEXT: and a0, a4, a0
490-
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
491-
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
492-
; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
493-
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
493+
; RV32IZFBFMIN-NEXT: and a1, a2, a3
494+
; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
495+
; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
496+
; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
497+
; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
498+
; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
499+
; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
500+
; RV32IZFBFMIN-NEXT: addi sp, sp, 32
494501
; RV32IZFBFMIN-NEXT: ret
495502
;
496503
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
497504
; R32IDZFBFMIN: # %bb.0: # %start
498-
; R32IDZFBFMIN-NEXT: addi sp, sp, -16
499-
; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
500-
; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
505+
; R32IDZFBFMIN-NEXT: addi sp, sp, -32
506+
; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
507+
; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
508+
; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
509+
; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
510+
; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
501511
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
512+
; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
513+
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
502514
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
515+
; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
516+
; R32IDZFBFMIN-NEXT: neg s1, s0
503517
; R32IDZFBFMIN-NEXT: lui a0, 913408
504518
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
505-
; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
519+
; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
520+
; R32IDZFBFMIN-NEXT: neg s3, s2
506521
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
507522
; R32IDZFBFMIN-NEXT: call __fixsfdi
523+
; R32IDZFBFMIN-NEXT: and a0, s3, a0
524+
; R32IDZFBFMIN-NEXT: or a0, s1, a0
525+
; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
526+
; R32IDZFBFMIN-NEXT: neg a2, a2
508527
; R32IDZFBFMIN-NEXT: lui a4, 524288
509-
; R32IDZFBFMIN-NEXT: lui a2, 524288
510-
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
528+
; R32IDZFBFMIN-NEXT: li a5, 1
529+
; R32IDZFBFMIN-NEXT: lui a3, 524288
530+
; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2
511531
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
512-
; R32IDZFBFMIN-NEXT: mv a2, a1
532+
; R32IDZFBFMIN-NEXT: mv a3, a1
513533
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
514-
; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
515-
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
516-
; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
517-
; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
534+
; R32IDZFBFMIN-NEXT: and a0, a2, a0
535+
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
518536
; R32IDZFBFMIN-NEXT: # %bb.3:
519-
; R32IDZFBFMIN-NEXT: addi a2, a4, -1
537+
; R32IDZFBFMIN-NEXT: addi a3, a4, -1
520538
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
521-
; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
522-
; R32IDZFBFMIN-NEXT: neg a4, a1
523-
; R32IDZFBFMIN-NEXT: and a1, a4, a2
524-
; R32IDZFBFMIN-NEXT: neg a2, a3
525-
; R32IDZFBFMIN-NEXT: neg a3, s0
526-
; R32IDZFBFMIN-NEXT: and a0, a3, a0
527-
; R32IDZFBFMIN-NEXT: or a0, a2, a0
528-
; R32IDZFBFMIN-NEXT: and a0, a4, a0
529-
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
530-
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
539+
; R32IDZFBFMIN-NEXT: and a1, a2, a3
540+
; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
541+
; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
542+
; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
543+
; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
544+
; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
531545
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
532-
; R32IDZFBFMIN-NEXT: addi sp, sp, 16
546+
; R32IDZFBFMIN-NEXT: addi sp, sp, 32
533547
; R32IDZFBFMIN-NEXT: ret
534548
;
535549
; RV32ID-LABEL: fcvt_l_bf16_sat:
536550
; RV32ID: # %bb.0: # %start
537-
; RV32ID-NEXT: addi sp, sp, -16
538-
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
539-
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
551+
; RV32ID-NEXT: addi sp, sp, -32
552+
; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
553+
; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
554+
; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
555+
; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
556+
; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
540557
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
558+
; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
559+
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
541560
; RV32ID-NEXT: fmv.x.w a0, fa0
542561
; RV32ID-NEXT: slli a0, a0, 16
543562
; RV32ID-NEXT: fmv.w.x fs0, a0
563+
; RV32ID-NEXT: flt.s s0, fa5, fs0
564+
; RV32ID-NEXT: neg s1, s0
544565
; RV32ID-NEXT: lui a0, 913408
545566
; RV32ID-NEXT: fmv.w.x fa5, a0
546-
; RV32ID-NEXT: fle.s s0, fa5, fs0
567+
; RV32ID-NEXT: fle.s s2, fa5, fs0
568+
; RV32ID-NEXT: neg s3, s2
547569
; RV32ID-NEXT: fmv.s fa0, fs0
548570
; RV32ID-NEXT: call __fixsfdi
571+
; RV32ID-NEXT: and a0, s3, a0
572+
; RV32ID-NEXT: or a0, s1, a0
573+
; RV32ID-NEXT: feq.s a2, fs0, fs0
574+
; RV32ID-NEXT: neg a2, a2
549575
; RV32ID-NEXT: lui a4, 524288
550-
; RV32ID-NEXT: lui a2, 524288
551-
; RV32ID-NEXT: beqz s0, .LBB10_2
576+
; RV32ID-NEXT: li a5, 1
577+
; RV32ID-NEXT: lui a3, 524288
578+
; RV32ID-NEXT: bne s2, a5, .LBB10_2
552579
; RV32ID-NEXT: # %bb.1: # %start
553-
; RV32ID-NEXT: mv a2, a1
580+
; RV32ID-NEXT: mv a3, a1
554581
; RV32ID-NEXT: .LBB10_2: # %start
555-
; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
556-
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
557-
; RV32ID-NEXT: flt.s a3, fa5, fs0
558-
; RV32ID-NEXT: beqz a3, .LBB10_4
582+
; RV32ID-NEXT: and a0, a2, a0
583+
; RV32ID-NEXT: beqz s0, .LBB10_4
559584
; RV32ID-NEXT: # %bb.3:
560-
; RV32ID-NEXT: addi a2, a4, -1
585+
; RV32ID-NEXT: addi a3, a4, -1
561586
; RV32ID-NEXT: .LBB10_4: # %start
562-
; RV32ID-NEXT: feq.s a1, fs0, fs0
563-
; RV32ID-NEXT: neg a4, a1
564-
; RV32ID-NEXT: and a1, a4, a2
565-
; RV32ID-NEXT: neg a2, a3
566-
; RV32ID-NEXT: neg a3, s0
567-
; RV32ID-NEXT: and a0, a3, a0
568-
; RV32ID-NEXT: or a0, a2, a0
569-
; RV32ID-NEXT: and a0, a4, a0
570-
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
571-
; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
587+
; RV32ID-NEXT: and a1, a2, a3
588+
; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
589+
; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
590+
; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
591+
; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
592+
; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
572593
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
573-
; RV32ID-NEXT: addi sp, sp, 16
594+
; RV32ID-NEXT: addi sp, sp, 32
574595
; RV32ID-NEXT: ret
575596
;
576597
; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat:
@@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
654675
; CHECK32ZFBFMIN-NEXT: neg s0, a0
655676
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero
656677
; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0
657-
; CHECK32ZFBFMIN-NEXT: neg s1, a0
678+
; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1
679+
; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1
658680
; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
659681
; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
660682
; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
@@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
681703
; RV32ID-NEXT: neg s0, a0
682704
; RV32ID-NEXT: fmv.w.x fa5, zero
683705
; RV32ID-NEXT: fle.s a0, fa5, fa0
684-
; RV32ID-NEXT: neg s1, a0
706+
; RV32ID-NEXT: xori a0, a0, 1
707+
; RV32ID-NEXT: addi s1, a0, -1
685708
; RV32ID-NEXT: call __fixunssfdi
686709
; RV32ID-NEXT: and a0, s1, a0
687710
; RV32ID-NEXT: or a0, s0, a0

0 commit comments

Comments
 (0)