Skip to content

Commit 73472c5

Browse files
committed
[SelectionDAG] Treat CopyFromReg as freezing the value (#85932)
The description of CopyFromReg in ISDOpcodes.h says that the input valus is defined outside the scope of the current SelectionDAG. I think that means that we basically can treat it as a FREEZE in the sense that it can be seen as neither being undef nor poison. Being able to fold freeze(CopyFromReg) into CopyFromReg seems useful to avoid regressions if we start to introduce freeze instruction in DAGCombiner/foldBoolSelectToLogic, e.g. to solve #84653 Things _not_ dealt with in this patch: - Depending on calling convention an input argument can be passed also on the stack and not in a register. If it is allowed to treat an argument received in a register as not being poison, then I think we want to treat arguments received on the stack the same way. But then we need to attribute load instructions, or add explicit FREEZE when lowering formal arguments. - A common pattern is that there is an AssertZext or AssertSext just after CopyFromReg. I think that if we treat CopyFromReg as never being poison, then it should be allowed to fold (freeze(AssertZext(CopyFromReg))) -> AssertZext(CopyFromReg))
1 parent 431be86 commit 73472c5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1856
-1919
lines changed

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ enum NodeType {
205205
/// CopyFromReg - This node indicates that the input value is a virtual or
206206
/// physical register that is defined outside of the scope of this
207207
/// SelectionDAG. The register is available from the RegisterSDNode object.
208+
/// Note that CopyFromReg is considered as also freezing the value.
208209
CopyFromReg,
209210

210211
/// UNDEF - An undefined node.

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5063,6 +5063,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
50635063
case ISD::VALUETYPE:
50645064
case ISD::FrameIndex:
50655065
case ISD::TargetFrameIndex:
5066+
case ISD::CopyFromReg:
50665067
return true;
50675068

50685069
case ISD::UNDEF:

llvm/test/CodeGen/AArch64/combine-mul.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
4444
define i64 @combine_mul_self_demandedbits(i64 %x) {
4545
; CHECK-LABEL: combine_mul_self_demandedbits:
4646
; CHECK: // %bb.0:
47-
; CHECK-NEXT: mul x8, x0, x0
48-
; CHECK-NEXT: and x0, x8, #0xfffffffffffffffd
47+
; CHECK-NEXT: mul x0, x0, x0
4948
; CHECK-NEXT: ret
5049
%1 = mul i64 %x, %x
5150
%2 = and i64 %1, -3
@@ -77,7 +76,7 @@ define i8 @one_demanded_bit(i8 %x) {
7776
define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
7877
; CHECK-LABEL: one_demanded_bit_splat:
7978
; CHECK: // %bb.0:
80-
; CHECK-NEXT: mov w8, #32
79+
; CHECK-NEXT: mov w8, #32 // =0x20
8180
; CHECK-NEXT: shl v0.2d, v0.2d, #5
8281
; CHECK-NEXT: dup v1.2d, x8
8382
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@@ -131,7 +130,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) {
131130
define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
132131
; CHECK-LABEL: squared_demanded_2_low_bits_splat:
133132
; CHECK: // %bb.0:
134-
; CHECK-NEXT: mov x8, #-2
133+
; CHECK-NEXT: mov x8, #-2 // =0xfffffffffffffffe
135134
; CHECK-NEXT: dup v1.2d, x8
136135
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
137136
; CHECK-NEXT: ret

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,9 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
123123
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
124124
; CHECK-NEXT: xvld $xr0, $a0, 0
125125
; CHECK-NEXT: xvst $xr0, $sp, 0
126-
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
127-
; CHECK-NEXT: addi.d $a3, $sp, 0
128-
; CHECK-NEXT: bstrins.d $a3, $a0, 4, 0
129-
; CHECK-NEXT: st.b $a2, $a3, 0
126+
; CHECK-NEXT: addi.d $a0, $sp, 0
127+
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0
128+
; CHECK-NEXT: st.b $a2, $a0, 0
130129
; CHECK-NEXT: xvld $xr0, $sp, 0
131130
; CHECK-NEXT: xvst $xr0, $a1, 0
132131
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -150,10 +149,9 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
150149
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
151150
; CHECK-NEXT: xvld $xr0, $a0, 0
152151
; CHECK-NEXT: xvst $xr0, $sp, 0
153-
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
154-
; CHECK-NEXT: addi.d $a3, $sp, 0
155-
; CHECK-NEXT: bstrins.d $a3, $a0, 4, 1
156-
; CHECK-NEXT: st.h $a2, $a3, 0
152+
; CHECK-NEXT: addi.d $a0, $sp, 0
153+
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1
154+
; CHECK-NEXT: st.h $a2, $a0, 0
157155
; CHECK-NEXT: xvld $xr0, $sp, 0
158156
; CHECK-NEXT: xvst $xr0, $a1, 0
159157
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -177,10 +175,9 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
177175
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
178176
; CHECK-NEXT: xvld $xr0, $a0, 0
179177
; CHECK-NEXT: xvst $xr0, $sp, 0
180-
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
181-
; CHECK-NEXT: addi.d $a3, $sp, 0
182-
; CHECK-NEXT: bstrins.d $a3, $a0, 4, 2
183-
; CHECK-NEXT: st.w $a2, $a3, 0
178+
; CHECK-NEXT: addi.d $a0, $sp, 0
179+
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2
180+
; CHECK-NEXT: st.w $a2, $a0, 0
184181
; CHECK-NEXT: xvld $xr0, $sp, 0
185182
; CHECK-NEXT: xvst $xr0, $a1, 0
186183
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -204,10 +201,9 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
204201
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
205202
; CHECK-NEXT: xvld $xr0, $a0, 0
206203
; CHECK-NEXT: xvst $xr0, $sp, 0
207-
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
208-
; CHECK-NEXT: addi.d $a3, $sp, 0
209-
; CHECK-NEXT: bstrins.d $a3, $a0, 4, 3
210-
; CHECK-NEXT: st.d $a2, $a3, 0
204+
; CHECK-NEXT: addi.d $a0, $sp, 0
205+
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3
206+
; CHECK-NEXT: st.d $a2, $a0, 0
211207
; CHECK-NEXT: xvld $xr0, $sp, 0
212208
; CHECK-NEXT: xvst $xr0, $a1, 0
213209
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -231,10 +227,9 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin
231227
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
232228
; CHECK-NEXT: xvld $xr1, $a0, 0
233229
; CHECK-NEXT: xvst $xr1, $sp, 0
234-
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
235-
; CHECK-NEXT: addi.d $a2, $sp, 0
236-
; CHECK-NEXT: bstrins.d $a2, $a0, 4, 2
237-
; CHECK-NEXT: fst.s $fa0, $a2, 0
230+
; CHECK-NEXT: addi.d $a0, $sp, 0
231+
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
232+
; CHECK-NEXT: fst.s $fa0, $a0, 0
238233
; CHECK-NEXT: xvld $xr0, $sp, 0
239234
; CHECK-NEXT: xvst $xr0, $a1, 0
240235
; CHECK-NEXT: addi.d $sp, $fp, -64
@@ -258,10 +253,9 @@ define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounw
258253
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
259254
; CHECK-NEXT: xvld $xr1, $a0, 0
260255
; CHECK-NEXT: xvst $xr1, $sp, 0
261-
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
262-
; CHECK-NEXT: addi.d $a2, $sp, 0
263-
; CHECK-NEXT: bstrins.d $a2, $a0, 4, 3
264-
; CHECK-NEXT: fst.d $fa0, $a2, 0
256+
; CHECK-NEXT: addi.d $a0, $sp, 0
257+
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
258+
; CHECK-NEXT: fst.d $fa0, $a0, 0
265259
; CHECK-NEXT: xvld $xr0, $sp, 0
266260
; CHECK-NEXT: xvst $xr0, $a1, 0
267261
; CHECK-NEXT: addi.d $sp, $fp, -64

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,9 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
8787
; CHECK-NEXT: addi.d $sp, $sp, -16
8888
; CHECK-NEXT: vld $vr0, $a0, 0
8989
; CHECK-NEXT: vst $vr0, $sp, 0
90-
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
91-
; CHECK-NEXT: addi.d $a3, $sp, 0
92-
; CHECK-NEXT: bstrins.d $a3, $a0, 3, 0
93-
; CHECK-NEXT: st.b $a2, $a3, 0
90+
; CHECK-NEXT: addi.d $a0, $sp, 0
91+
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0
92+
; CHECK-NEXT: st.b $a2, $a0, 0
9493
; CHECK-NEXT: vld $vr0, $sp, 0
9594
; CHECK-NEXT: vst $vr0, $a1, 0
9695
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -107,10 +106,9 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
107106
; CHECK-NEXT: addi.d $sp, $sp, -16
108107
; CHECK-NEXT: vld $vr0, $a0, 0
109108
; CHECK-NEXT: vst $vr0, $sp, 0
110-
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
111-
; CHECK-NEXT: addi.d $a3, $sp, 0
112-
; CHECK-NEXT: bstrins.d $a3, $a0, 3, 1
113-
; CHECK-NEXT: st.h $a2, $a3, 0
109+
; CHECK-NEXT: addi.d $a0, $sp, 0
110+
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1
111+
; CHECK-NEXT: st.h $a2, $a0, 0
114112
; CHECK-NEXT: vld $vr0, $sp, 0
115113
; CHECK-NEXT: vst $vr0, $a1, 0
116114
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -127,10 +125,9 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
127125
; CHECK-NEXT: addi.d $sp, $sp, -16
128126
; CHECK-NEXT: vld $vr0, $a0, 0
129127
; CHECK-NEXT: vst $vr0, $sp, 0
130-
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
131-
; CHECK-NEXT: addi.d $a3, $sp, 0
132-
; CHECK-NEXT: bstrins.d $a3, $a0, 3, 2
133-
; CHECK-NEXT: st.w $a2, $a3, 0
128+
; CHECK-NEXT: addi.d $a0, $sp, 0
129+
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2
130+
; CHECK-NEXT: st.w $a2, $a0, 0
134131
; CHECK-NEXT: vld $vr0, $sp, 0
135132
; CHECK-NEXT: vst $vr0, $a1, 0
136133
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -147,10 +144,9 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
147144
; CHECK-NEXT: addi.d $sp, $sp, -16
148145
; CHECK-NEXT: vld $vr0, $a0, 0
149146
; CHECK-NEXT: vst $vr0, $sp, 0
150-
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
151-
; CHECK-NEXT: addi.d $a3, $sp, 0
152-
; CHECK-NEXT: bstrins.d $a3, $a0, 3, 3
153-
; CHECK-NEXT: st.d $a2, $a3, 0
147+
; CHECK-NEXT: addi.d $a0, $sp, 0
148+
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3
149+
; CHECK-NEXT: st.d $a2, $a0, 0
154150
; CHECK-NEXT: vld $vr0, $sp, 0
155151
; CHECK-NEXT: vst $vr0, $a1, 0
156152
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -167,10 +163,9 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi
167163
; CHECK-NEXT: addi.d $sp, $sp, -16
168164
; CHECK-NEXT: vld $vr1, $a0, 0
169165
; CHECK-NEXT: vst $vr1, $sp, 0
170-
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
171-
; CHECK-NEXT: addi.d $a2, $sp, 0
172-
; CHECK-NEXT: bstrins.d $a2, $a0, 3, 2
173-
; CHECK-NEXT: fst.s $fa0, $a2, 0
166+
; CHECK-NEXT: addi.d $a0, $sp, 0
167+
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2
168+
; CHECK-NEXT: fst.s $fa0, $a0, 0
174169
; CHECK-NEXT: vld $vr0, $sp, 0
175170
; CHECK-NEXT: vst $vr0, $a1, 0
176171
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -187,10 +182,9 @@ define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) noun
187182
; CHECK-NEXT: addi.d $sp, $sp, -16
188183
; CHECK-NEXT: vld $vr1, $a0, 0
189184
; CHECK-NEXT: vst $vr1, $sp, 0
190-
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
191-
; CHECK-NEXT: addi.d $a2, $sp, 0
192-
; CHECK-NEXT: bstrins.d $a2, $a0, 3, 3
193-
; CHECK-NEXT: fst.d $fa0, $a2, 0
185+
; CHECK-NEXT: addi.d $a0, $sp, 0
186+
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3
187+
; CHECK-NEXT: fst.d $fa0, $a0, 0
194188
; CHECK-NEXT: vld $vr0, $sp, 0
195189
; CHECK-NEXT: vst $vr0, $a1, 0
196190
; CHECK-NEXT: addi.d $sp, $sp, 16

llvm/test/CodeGen/RISCV/alu64.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define i64 @sltiu(i64 %a) nounwind {
5757
;
5858
; RV32I-LABEL: sltiu:
5959
; RV32I: # %bb.0:
60-
; RV32I-NEXT: seqz a1, a1
6160
; RV32I-NEXT: sltiu a0, a0, 3
61+
; RV32I-NEXT: seqz a1, a1
6262
; RV32I-NEXT: and a0, a1, a0
6363
; RV32I-NEXT: li a1, 0
6464
; RV32I-NEXT: ret

llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
372372
; RV32IA-NEXT: # =>This Loop Header: Depth=1
373373
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
374374
; RV32IA-NEXT: mv a3, a2
375-
; RV32IA-NEXT: addi a4, a2, 1
376-
; RV32IA-NEXT: sltu a2, a2, a1
377-
; RV32IA-NEXT: neg a2, a2
378-
; RV32IA-NEXT: and a4, a2, a4
375+
; RV32IA-NEXT: addi a2, a2, 1
376+
; RV32IA-NEXT: sltu a4, a3, a1
377+
; RV32IA-NEXT: neg a4, a4
378+
; RV32IA-NEXT: and a4, a4, a2
379379
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
380380
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
381381
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
@@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
607607
; RV64IA-NEXT: # =>This Loop Header: Depth=1
608608
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
609609
; RV64IA-NEXT: mv a3, a2
610-
; RV64IA-NEXT: addi a4, a2, 1
611-
; RV64IA-NEXT: sltu a2, a2, a1
612-
; RV64IA-NEXT: neg a2, a2
613-
; RV64IA-NEXT: and a4, a2, a4
610+
; RV64IA-NEXT: addi a2, a2, 1
611+
; RV64IA-NEXT: sltu a4, a3, a1
612+
; RV64IA-NEXT: neg a4, a4
613+
; RV64IA-NEXT: and a4, a4, a2
614614
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
615615
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
616616
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2

llvm/test/CodeGen/RISCV/bfloat-convert.ll

Lines changed: 48 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -456,92 +456,80 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
456456
define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
457457
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
458458
; RV32IZFBFMIN: # %bb.0: # %start
459-
; RV32IZFBFMIN-NEXT: addi sp, sp, -32
460-
; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
461-
; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
462-
; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
463-
; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
464-
; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
465-
; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
466-
; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
467-
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
459+
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
460+
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
461+
; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
462+
; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
468463
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
469-
; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
470-
; RV32IZFBFMIN-NEXT: neg s1, s0
471464
; RV32IZFBFMIN-NEXT: lui a0, 913408
472465
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
473-
; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
474-
; RV32IZFBFMIN-NEXT: neg s3, s2
466+
; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
475467
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
476468
; RV32IZFBFMIN-NEXT: call __fixsfdi
477-
; RV32IZFBFMIN-NEXT: and a0, s3, a0
478-
; RV32IZFBFMIN-NEXT: or a0, s1, a0
479-
; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
480-
; RV32IZFBFMIN-NEXT: neg a2, a2
481469
; RV32IZFBFMIN-NEXT: lui a4, 524288
482-
; RV32IZFBFMIN-NEXT: lui a3, 524288
483-
; RV32IZFBFMIN-NEXT: beqz s2, .LBB10_2
470+
; RV32IZFBFMIN-NEXT: lui a2, 524288
471+
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
484472
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
485-
; RV32IZFBFMIN-NEXT: mv a3, a1
473+
; RV32IZFBFMIN-NEXT: mv a2, a1
486474
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
487-
; RV32IZFBFMIN-NEXT: and a0, a2, a0
488-
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
475+
; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
476+
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
477+
; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
478+
; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
489479
; RV32IZFBFMIN-NEXT: # %bb.3:
490-
; RV32IZFBFMIN-NEXT: addi a3, a4, -1
480+
; RV32IZFBFMIN-NEXT: addi a2, a4, -1
491481
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
492-
; RV32IZFBFMIN-NEXT: and a1, a2, a3
493-
; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
494-
; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
495-
; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
496-
; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
497-
; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
498-
; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
499-
; RV32IZFBFMIN-NEXT: addi sp, sp, 32
482+
; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
483+
; RV32IZFBFMIN-NEXT: neg a4, a1
484+
; RV32IZFBFMIN-NEXT: and a1, a4, a2
485+
; RV32IZFBFMIN-NEXT: neg a2, a3
486+
; RV32IZFBFMIN-NEXT: neg a3, s0
487+
; RV32IZFBFMIN-NEXT: and a0, a3, a0
488+
; RV32IZFBFMIN-NEXT: or a0, a2, a0
489+
; RV32IZFBFMIN-NEXT: and a0, a4, a0
490+
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
491+
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
492+
; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
493+
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
500494
; RV32IZFBFMIN-NEXT: ret
501495
;
502496
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
503497
; R32IDZFBFMIN: # %bb.0: # %start
504-
; R32IDZFBFMIN-NEXT: addi sp, sp, -32
505-
; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
506-
; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
507-
; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
508-
; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
509-
; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
498+
; R32IDZFBFMIN-NEXT: addi sp, sp, -16
499+
; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
500+
; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
510501
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
511-
; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
512-
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
513502
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
514-
; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
515-
; R32IDZFBFMIN-NEXT: neg s1, s0
516503
; R32IDZFBFMIN-NEXT: lui a0, 913408
517504
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
518-
; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
519-
; R32IDZFBFMIN-NEXT: neg s3, s2
505+
; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
520506
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
521507
; R32IDZFBFMIN-NEXT: call __fixsfdi
522-
; R32IDZFBFMIN-NEXT: and a0, s3, a0
523-
; R32IDZFBFMIN-NEXT: or a0, s1, a0
524-
; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
525-
; R32IDZFBFMIN-NEXT: neg a2, a2
526508
; R32IDZFBFMIN-NEXT: lui a4, 524288
527-
; R32IDZFBFMIN-NEXT: lui a3, 524288
528-
; R32IDZFBFMIN-NEXT: beqz s2, .LBB10_2
509+
; R32IDZFBFMIN-NEXT: lui a2, 524288
510+
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
529511
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
530-
; R32IDZFBFMIN-NEXT: mv a3, a1
512+
; R32IDZFBFMIN-NEXT: mv a2, a1
531513
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
532-
; R32IDZFBFMIN-NEXT: and a0, a2, a0
533-
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
514+
; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
515+
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
516+
; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
517+
; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
534518
; R32IDZFBFMIN-NEXT: # %bb.3:
535-
; R32IDZFBFMIN-NEXT: addi a3, a4, -1
519+
; R32IDZFBFMIN-NEXT: addi a2, a4, -1
536520
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
537-
; R32IDZFBFMIN-NEXT: and a1, a2, a3
538-
; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
539-
; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
540-
; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
541-
; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
542-
; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
521+
; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
522+
; R32IDZFBFMIN-NEXT: neg a4, a1
523+
; R32IDZFBFMIN-NEXT: and a1, a4, a2
524+
; R32IDZFBFMIN-NEXT: neg a2, a3
525+
; R32IDZFBFMIN-NEXT: neg a3, s0
526+
; R32IDZFBFMIN-NEXT: and a0, a3, a0
527+
; R32IDZFBFMIN-NEXT: or a0, a2, a0
528+
; R32IDZFBFMIN-NEXT: and a0, a4, a0
529+
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
530+
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
543531
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
544-
; R32IDZFBFMIN-NEXT: addi sp, sp, 32
532+
; R32IDZFBFMIN-NEXT: addi sp, sp, 16
545533
; R32IDZFBFMIN-NEXT: ret
546534
;
547535
; RV32ID-LABEL: fcvt_l_bf16_sat:

0 commit comments

Comments
 (0)