Skip to content

Commit 5886f0a

Browse files
preamestopperc
andauthored
[RISCV] Allow larger offset when matching build_vector as vid sequence (#144756)
I happened to notice that when legalizing get.active.lane.mask with large vectors we were materializing via constant pool instead of just shifting by a constant. We should probably be doing a full cost comparison for the different lowering strategies as opposed to our current adhoc heuristics, but the few cases this regresses seem pretty minor. (Given the reduction in vset toggles, they might not be regressions at all.) --------- Co-authored-by: Craig Topper <[email protected]>
1 parent 0c2191b commit 5886f0a

File tree

7 files changed

+119
-130
lines changed

7 files changed

+119
-130
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3724,14 +3724,14 @@ static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG,
37243724
SplatStepVal = Log2_64(std::abs(StepNumerator));
37253725
}
37263726

3727-
// Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3728-
// threshold since it's the immediate value many RVV instructions accept.
3729-
// There is no vmul.vi instruction so ensure multiply constant can fit in
3730-
// a single addi instruction.
3727+
// Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3728+
// since it's the immediate value many RVV instructions accept. There is
3729+
// no vmul.vi instruction so ensure multiply constant can fit in a
3730+
// single addi instruction. For the addend, we allow up to 32 bits..
37313731
if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
37323732
(StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
37333733
isPowerOf2_32(StepDenominator) &&
3734-
(SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3734+
(SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
37353735
MVT VIDVT =
37363736
VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
37373737
MVT VIDContainerVT =

llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll

Lines changed: 54 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -103,18 +103,16 @@ define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) {
103103
define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
104104
; CHECK-LABEL: fv32:
105105
; CHECK: # %bb.0:
106-
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
107-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
108106
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
109-
; CHECK-NEXT: vle8.v v16, (a0)
110107
; CHECK-NEXT: vid.v v8
108+
; CHECK-NEXT: li a0, 16
109+
; CHECK-NEXT: vadd.vx v16, v8, a0
111110
; CHECK-NEXT: vsaddu.vx v8, v8, a1
111+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
112+
; CHECK-NEXT: vmsltu.vx v24, v16, a2
112113
; CHECK-NEXT: vmsltu.vx v0, v8, a2
113-
; CHECK-NEXT: vsext.vf8 v8, v16
114-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
115-
; CHECK-NEXT: vmsltu.vx v16, v8, a2
116114
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
117-
; CHECK-NEXT: vslideup.vi v0, v16, 2
115+
; CHECK-NEXT: vslideup.vi v0, v24, 2
118116
; CHECK-NEXT: ret
119117
%mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc)
120118
ret <32 x i1> %mask
@@ -125,30 +123,24 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
125123
; CHECK: # %bb.0:
126124
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
127125
; CHECK-NEXT: vid.v v8
128-
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
129-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
130-
; CHECK-NEXT: vle8.v v16, (a0)
131-
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
132-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
133-
; CHECK-NEXT: vle8.v v17, (a0)
134-
; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
135-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
136-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
137-
; CHECK-NEXT: vle8.v v18, (a0)
138-
; CHECK-NEXT: vmsltu.vx v0, v8, a2
139-
; CHECK-NEXT: vsext.vf8 v8, v16
126+
; CHECK-NEXT: li a0, 16
127+
; CHECK-NEXT: vsaddu.vx v16, v8, a1
128+
; CHECK-NEXT: vmsltu.vx v0, v16, a2
129+
; CHECK-NEXT: vadd.vx v16, v8, a0
130+
; CHECK-NEXT: li a0, 32
131+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
132+
; CHECK-NEXT: vmsltu.vx v24, v16, a2
133+
; CHECK-NEXT: vadd.vx v16, v8, a0
134+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
135+
; CHECK-NEXT: vmsltu.vx v25, v16, a2
136+
; CHECK-NEXT: li a0, 48
137+
; CHECK-NEXT: vadd.vx v8, v8, a0
140138
; CHECK-NEXT: vsaddu.vx v8, v8, a1
141-
; CHECK-NEXT: vmsltu.vx v16, v8, a2
142-
; CHECK-NEXT: vsext.vf8 v8, v17
143-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
144-
; CHECK-NEXT: vmsltu.vx v17, v8, a2
145139
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
146-
; CHECK-NEXT: vslideup.vi v0, v16, 2
140+
; CHECK-NEXT: vslideup.vi v0, v24, 2
147141
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
148-
; CHECK-NEXT: vslideup.vi v0, v17, 4
142+
; CHECK-NEXT: vslideup.vi v0, v25, 4
149143
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
150-
; CHECK-NEXT: vsext.vf8 v8, v18
151-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
152144
; CHECK-NEXT: vmsltu.vx v16, v8, a2
153145
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
154146
; CHECK-NEXT: vslideup.vi v0, v16, 6
@@ -160,63 +152,49 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
160152
define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
161153
; CHECK-LABEL: fv128:
162154
; CHECK: # %bb.0:
163-
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
164-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0)
165155
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
166-
; CHECK-NEXT: vle8.v v16, (a0)
167-
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
168-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_1)
169-
; CHECK-NEXT: vle8.v v17, (a0)
170-
; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
171-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
172-
; CHECK-NEXT: vle8.v v18, (a0)
173-
; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
174-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
175156
; CHECK-NEXT: vid.v v8
176-
; CHECK-NEXT: vle8.v v19, (a0)
177-
; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
178-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
179-
; CHECK-NEXT: vle8.v v20, (a0)
180-
; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
181-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
182-
; CHECK-NEXT: vle8.v v21, (a0)
183-
; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
184-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
185-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
186-
; CHECK-NEXT: vle8.v v22, (a0)
187-
; CHECK-NEXT: vmsltu.vx v0, v8, a2
188-
; CHECK-NEXT: vsext.vf8 v8, v16
157+
; CHECK-NEXT: li a0, 80
158+
; CHECK-NEXT: vsaddu.vx v16, v8, a1
159+
; CHECK-NEXT: vmsltu.vx v0, v16, a2
160+
; CHECK-NEXT: vadd.vx v16, v8, a0
161+
; CHECK-NEXT: li a0, 64
162+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
163+
; CHECK-NEXT: vmsltu.vx v24, v16, a2
164+
; CHECK-NEXT: vadd.vx v16, v8, a0
165+
; CHECK-NEXT: li a0, 96
166+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
167+
; CHECK-NEXT: vmsltu.vx v25, v16, a2
168+
; CHECK-NEXT: vadd.vx v16, v8, a0
169+
; CHECK-NEXT: li a0, 112
170+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
171+
; CHECK-NEXT: vmsltu.vx v26, v16, a2
172+
; CHECK-NEXT: vadd.vx v16, v8, a0
173+
; CHECK-NEXT: li a0, 16
174+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
175+
; CHECK-NEXT: vmsltu.vx v27, v16, a2
176+
; CHECK-NEXT: vadd.vx v16, v8, a0
177+
; CHECK-NEXT: li a0, 32
178+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
179+
; CHECK-NEXT: vmsltu.vx v28, v16, a2
180+
; CHECK-NEXT: vadd.vx v16, v8, a0
181+
; CHECK-NEXT: vsaddu.vx v16, v16, a1
182+
; CHECK-NEXT: vmsltu.vx v29, v16, a2
183+
; CHECK-NEXT: li a0, 48
184+
; CHECK-NEXT: vadd.vx v8, v8, a0
189185
; CHECK-NEXT: vsaddu.vx v8, v8, a1
190186
; CHECK-NEXT: vmsltu.vx v16, v8, a2
191-
; CHECK-NEXT: vsext.vf8 v8, v17
192-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
193-
; CHECK-NEXT: vmsltu.vx v17, v8, a2
194-
; CHECK-NEXT: vsext.vf8 v8, v18
195-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
196-
; CHECK-NEXT: vmsltu.vx v18, v8, a2
197-
; CHECK-NEXT: vsext.vf8 v8, v19
198-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
199-
; CHECK-NEXT: vmsltu.vx v19, v8, a2
200-
; CHECK-NEXT: vsext.vf8 v8, v20
201-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
202-
; CHECK-NEXT: vmsltu.vx v20, v8, a2
203-
; CHECK-NEXT: vsext.vf8 v8, v21
204-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
205-
; CHECK-NEXT: vmsltu.vx v21, v8, a2
206-
; CHECK-NEXT: vsext.vf8 v8, v22
207-
; CHECK-NEXT: vsaddu.vx v8, v8, a1
208-
; CHECK-NEXT: vmsltu.vx v22, v8, a2
209187
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
210-
; CHECK-NEXT: vslideup.vi v17, v16, 2
211-
; CHECK-NEXT: vslideup.vi v0, v20, 2
188+
; CHECK-NEXT: vslideup.vi v25, v24, 2
189+
; CHECK-NEXT: vslideup.vi v0, v28, 2
212190
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
213-
; CHECK-NEXT: vslideup.vi v17, v18, 4
214-
; CHECK-NEXT: vslideup.vi v0, v21, 4
191+
; CHECK-NEXT: vslideup.vi v25, v26, 4
192+
; CHECK-NEXT: vslideup.vi v0, v29, 4
215193
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
216-
; CHECK-NEXT: vslideup.vi v17, v19, 6
217-
; CHECK-NEXT: vslideup.vi v0, v22, 6
194+
; CHECK-NEXT: vslideup.vi v25, v27, 6
195+
; CHECK-NEXT: vslideup.vi v0, v16, 6
218196
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
219-
; CHECK-NEXT: vslideup.vi v0, v17, 8
197+
; CHECK-NEXT: vslideup.vi v0, v25, 8
220198
; CHECK-NEXT: ret
221199
%mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc)
222200
ret <128 x i1> %mask

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,21 +94,25 @@ define <2 x bfloat> @vid_v2bf16() {
9494
define <2 x bfloat> @vid_addend1_v2bf16() {
9595
; CHECK-LABEL: vid_addend1_v2bf16:
9696
; CHECK: # %bb.0:
97-
; CHECK-NEXT: lui a0, 262148
97+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
98+
; CHECK-NEXT: vid.v v8
99+
; CHECK-NEXT: lui a0, 4
100+
; CHECK-NEXT: vsll.vi v8, v8, 7
98101
; CHECK-NEXT: addi a0, a0, -128
99-
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
100-
; CHECK-NEXT: vmv.s.x v8, a0
102+
; CHECK-NEXT: vadd.vx v8, v8, a0
101103
; CHECK-NEXT: ret
102104
ret <2 x bfloat> <bfloat 1.0, bfloat 2.0>
103105
}
104106

105107
define <2 x bfloat> @vid_denominator2_v2bf16() {
106108
; CHECK-LABEL: vid_denominator2_v2bf16:
107109
; CHECK: # %bb.0:
108-
; CHECK-NEXT: lui a0, 260100
110+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
111+
; CHECK-NEXT: vid.v v8
112+
; CHECK-NEXT: lui a0, 4
113+
; CHECK-NEXT: vsll.vi v8, v8, 7
109114
; CHECK-NEXT: addi a0, a0, -256
110-
; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma
111-
; CHECK-NEXT: vmv.s.x v8, a0
115+
; CHECK-NEXT: vadd.vx v8, v8, a0
112116
; CHECK-NEXT: ret
113117
ret <2 x bfloat> <bfloat 0.5, bfloat 1.0>
114118
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,18 +1573,22 @@ define <2 x half> @vid_addend1_v2f16() {
15731573
;
15741574
; RV32ZVFHMIN-LABEL: vid_addend1_v2f16:
15751575
; RV32ZVFHMIN: # %bb.0:
1576-
; RV32ZVFHMIN-NEXT: lui a0, 262148
1577-
; RV32ZVFHMIN-NEXT: addi a0, a0, -1024
1578-
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1579-
; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0
1576+
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1577+
; RV32ZVFHMIN-NEXT: vid.v v8
1578+
; RV32ZVFHMIN-NEXT: li a0, 15
1579+
; RV32ZVFHMIN-NEXT: vsll.vi v8, v8, 10
1580+
; RV32ZVFHMIN-NEXT: slli a0, a0, 10
1581+
; RV32ZVFHMIN-NEXT: vadd.vx v8, v8, a0
15801582
; RV32ZVFHMIN-NEXT: ret
15811583
;
15821584
; RV64ZVFHMIN-LABEL: vid_addend1_v2f16:
15831585
; RV64ZVFHMIN: # %bb.0:
1584-
; RV64ZVFHMIN-NEXT: lui a0, 262148
1585-
; RV64ZVFHMIN-NEXT: addi a0, a0, -1024
1586-
; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1587-
; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0
1586+
; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1587+
; RV64ZVFHMIN-NEXT: vid.v v8
1588+
; RV64ZVFHMIN-NEXT: li a0, 15
1589+
; RV64ZVFHMIN-NEXT: vsll.vi v8, v8, 10
1590+
; RV64ZVFHMIN-NEXT: slli a0, a0, 10
1591+
; RV64ZVFHMIN-NEXT: vadd.vx v8, v8, a0
15881592
; RV64ZVFHMIN-NEXT: ret
15891593
ret <2 x half> <half 1.0, half 2.0>
15901594
}
@@ -1608,18 +1612,22 @@ define <2 x half> @vid_denominator2_v2f16() {
16081612
;
16091613
; RV32ZVFHMIN-LABEL: vid_denominator2_v2f16:
16101614
; RV32ZVFHMIN: # %bb.0:
1611-
; RV32ZVFHMIN-NEXT: lui a0, 245764
1612-
; RV32ZVFHMIN-NEXT: addi a0, a0, -2048
1613-
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1614-
; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0
1615+
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1616+
; RV32ZVFHMIN-NEXT: vid.v v8
1617+
; RV32ZVFHMIN-NEXT: li a0, 7
1618+
; RV32ZVFHMIN-NEXT: vsll.vi v8, v8, 10
1619+
; RV32ZVFHMIN-NEXT: slli a0, a0, 11
1620+
; RV32ZVFHMIN-NEXT: vadd.vx v8, v8, a0
16151621
; RV32ZVFHMIN-NEXT: ret
16161622
;
16171623
; RV64ZVFHMIN-LABEL: vid_denominator2_v2f16:
16181624
; RV64ZVFHMIN: # %bb.0:
1619-
; RV64ZVFHMIN-NEXT: lui a0, 245764
1620-
; RV64ZVFHMIN-NEXT: addi a0, a0, -2048
1621-
; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1622-
; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0
1625+
; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
1626+
; RV64ZVFHMIN-NEXT: vid.v v8
1627+
; RV64ZVFHMIN-NEXT: li a0, 7
1628+
; RV64ZVFHMIN-NEXT: vsll.vi v8, v8, 10
1629+
; RV64ZVFHMIN-NEXT: slli a0, a0, 11
1630+
; RV64ZVFHMIN-NEXT: vadd.vx v8, v8, a0
16231631
; RV64ZVFHMIN-NEXT: ret
16241632
ret <2 x half> <half 0.5, half 1.0>
16251633
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ define void @buildvec_vid_plus_imm_v16i8(ptr %x) {
5858
define void @buildvec_vid_plus_nonimm_v16i8(ptr %x) {
5959
; CHECK-LABEL: buildvec_vid_plus_nonimm_v16i8:
6060
; CHECK: # %bb.0:
61-
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
62-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_0)
6361
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
64-
; CHECK-NEXT: vle8.v v8, (a1)
62+
; CHECK-NEXT: vid.v v8
63+
; CHECK-NEXT: li a1, 100
64+
; CHECK-NEXT: vadd.vx v8, v8, a1
6565
; CHECK-NEXT: vse8.v v8, (a0)
6666
; CHECK-NEXT: ret
6767
store <16 x i8> <i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115>, ptr %x

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1734,19 +1734,16 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m,
17341734
; RV32-NEXT: .cfi_offset ra, -4
17351735
; RV32-NEXT: mv a2, a0
17361736
; RV32-NEXT: li a0, 32
1737-
; RV32-NEXT: lui a3, %hi(.LCPI72_0)
1738-
; RV32-NEXT: addi a3, a3, %lo(.LCPI72_0)
17391737
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1740-
; RV32-NEXT: vle8.v v12, (a3)
17411738
; RV32-NEXT: vid.v v16
1742-
; RV32-NEXT: vmsltu.vx v14, v16, a1
1743-
; RV32-NEXT: li a3, 64
1744-
; RV32-NEXT: vsext.vf4 v16, v12
17451739
; RV32-NEXT: vmsltu.vx v12, v16, a1
1740+
; RV32-NEXT: vadd.vx v16, v16, a0
1741+
; RV32-NEXT: vmsltu.vx v13, v16, a1
1742+
; RV32-NEXT: li a1, 64
17461743
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1747-
; RV32-NEXT: vslideup.vi v14, v12, 4
1748-
; RV32-NEXT: vsetvli zero, a3, e8, m4, ta, ma
1749-
; RV32-NEXT: vmand.mm v0, v14, v0
1744+
; RV32-NEXT: vslideup.vi v12, v13, 4
1745+
; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
1746+
; RV32-NEXT: vmand.mm v0, v12, v0
17501747
; RV32-NEXT: vmv.v.i v12, 1
17511748
; RV32-NEXT: vmerge.vvm v8, v12, v8, v0
17521749
; RV32-NEXT: vslidedown.vx v12, v8, a0
@@ -1780,19 +1777,16 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m,
17801777
; RV64-NEXT: .cfi_offset ra, -8
17811778
; RV64-NEXT: mv a2, a0
17821779
; RV64-NEXT: li a0, 32
1783-
; RV64-NEXT: lui a3, %hi(.LCPI72_0)
1784-
; RV64-NEXT: addi a3, a3, %lo(.LCPI72_0)
17851780
; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1786-
; RV64-NEXT: vle8.v v12, (a3)
17871781
; RV64-NEXT: vid.v v16
1788-
; RV64-NEXT: vmsltu.vx v14, v16, a1
1789-
; RV64-NEXT: li a3, 64
1790-
; RV64-NEXT: vsext.vf4 v16, v12
17911782
; RV64-NEXT: vmsltu.vx v12, v16, a1
1783+
; RV64-NEXT: vadd.vx v16, v16, a0
1784+
; RV64-NEXT: vmsltu.vx v13, v16, a1
1785+
; RV64-NEXT: li a1, 64
17921786
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1793-
; RV64-NEXT: vslideup.vi v14, v12, 4
1794-
; RV64-NEXT: vsetvli zero, a3, e8, m4, ta, ma
1795-
; RV64-NEXT: vmand.mm v0, v14, v0
1787+
; RV64-NEXT: vslideup.vi v12, v13, 4
1788+
; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
1789+
; RV64-NEXT: vmand.mm v0, v12, v0
17961790
; RV64-NEXT: vmv.v.i v12, 1
17971791
; RV64-NEXT: vmerge.vvm v8, v12, v8, v0
17981792
; RV64-NEXT: vslidedown.vx v12, v8, a0

llvm/test/CodeGen/RISCV/rvv/vle_vid-vfcvt.ll

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
define void @foo_1(ptr nocapture noundef writeonly %t) {
55
; CHECK-LABEL: foo_1:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
8-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI0_0)
97
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
10-
; CHECK-NEXT: vle32.v v8, (a1)
8+
; CHECK-NEXT: vid.v v8
9+
; CHECK-NEXT: vsll.vi v8, v8, 7
10+
; CHECK-NEXT: lui a1, 524288
11+
; CHECK-NEXT: vadd.vx v8, v8, a1
12+
; CHECK-NEXT: vfcvt.f.x.v v8, v8
1113
; CHECK-NEXT: vse32.v v8, (a0)
1214
; CHECK-NEXT: ret
1315
entry:
@@ -18,10 +20,13 @@ entry:
1820
define void @foo_2(ptr nocapture noundef writeonly %t) {
1921
; CHECK-LABEL: foo_2:
2022
; CHECK: # %bb.0: # %entry
21-
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
22-
; CHECK-NEXT: addi a1, a1, %lo(.LCPI1_0)
2323
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
24-
; CHECK-NEXT: vle32.v v8, (a1)
24+
; CHECK-NEXT: vid.v v8
25+
; CHECK-NEXT: lui a1, 524288
26+
; CHECK-NEXT: vsll.vi v8, v8, 7
27+
; CHECK-NEXT: addi a1, a1, -512
28+
; CHECK-NEXT: vadd.vx v8, v8, a1
29+
; CHECK-NEXT: vfcvt.f.x.v v8, v8
2530
; CHECK-NEXT: vse32.v v8, (a0)
2631
; CHECK-NEXT: ret
2732
entry:

0 commit comments

Comments
 (0)