Skip to content

Commit 3a6cc52

Browse files
committed
Revert "[RISCV] Shrink vslideup's LMUL when lowering fixed insert_subvector (#65997)"
This reverts commit b5ff71e. As described in #68730, this appears to have exposed an existing liveness issue. Revert to green until we can figure out how to address the root cause. Note: This was not a clean revert. I ended up doing it by hand.
1 parent 2e59b75 commit 3a6cc52

File tree

4 files changed

+244
-228
lines changed

4 files changed

+244
-228
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8887,17 +8887,6 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
88878887
return DAG.getBitcast(Op.getValueType(), SubVec);
88888888
}
88898889

8890-
// Shrink down Vec so we're performing the slideup on a smaller LMUL.
8891-
unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
8892-
MVT OrigContainerVT = ContainerVT;
8893-
SDValue OrigVec = Vec;
8894-
if (auto ShrunkVT =
8895-
getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
8896-
ContainerVT = *ShrunkVT;
8897-
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8898-
DAG.getVectorIdxConstant(0, DL));
8899-
}
8900-
89018890
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
89028891
DAG.getUNDEF(ContainerVT), SubVec,
89038892
DAG.getConstant(0, DL, XLenVT));
@@ -8924,12 +8913,6 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
89248913
SlideupAmt, Mask, VL, Policy);
89258914
}
89268915

8927-
// If we performed the slideup on a smaller LMUL, insert the result back
8928-
// into the rest of the vector.
8929-
if (ContainerVT != OrigContainerVT)
8930-
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8931-
SubVec, DAG.getVectorIdxConstant(0, DL));
8932-
89338916
if (VecVT.isFixedLengthVector())
89348917
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
89358918
return DAG.getBitcast(Op.getValueType(), SubVec);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, ptr %
1414
; CHECK: # %bb.0:
1515
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1616
; CHECK-NEXT: vle32.v v12, (a0)
17-
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
17+
; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma
1818
; CHECK-NEXT: vmv.v.v v8, v12
1919
; CHECK-NEXT: ret
2020
%sv = load <2 x i32>, ptr %svp
@@ -27,7 +27,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, ptr %
2727
; CHECK: # %bb.0:
2828
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
2929
; CHECK-NEXT: vle32.v v12, (a0)
30-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
30+
; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma
3131
; CHECK-NEXT: vslideup.vi v8, v12, 2
3232
; CHECK-NEXT: ret
3333
%sv = load <2 x i32>, ptr %svp
@@ -40,7 +40,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
4040
; CHECK: # %bb.0:
4141
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4242
; CHECK-NEXT: vle32.v v12, (a0)
43-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma
43+
; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma
4444
; CHECK-NEXT: vslideup.vi v8, v12, 6
4545
; CHECK-NEXT: ret
4646
%sv = load <2 x i32>, ptr %svp
@@ -51,19 +51,22 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
5151
define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, ptr %svp) {
5252
; LMULMAX2-LABEL: insert_nxv8i32_v8i32_0:
5353
; LMULMAX2: # %bb.0:
54-
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, tu, ma
55-
; LMULMAX2-NEXT: vle32.v v8, (a0)
54+
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
55+
; LMULMAX2-NEXT: vle32.v v12, (a0)
56+
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m4, tu, ma
57+
; LMULMAX2-NEXT: vmv.v.v v8, v12
5658
; LMULMAX2-NEXT: ret
5759
;
5860
; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0:
5961
; LMULMAX1: # %bb.0:
60-
; LMULMAX1-NEXT: addi a1, a0, 16
6162
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
62-
; LMULMAX1-NEXT: vle32.v v12, (a1)
63-
; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, ma
64-
; LMULMAX1-NEXT: vle32.v v8, (a0)
65-
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m2, tu, ma
66-
; LMULMAX1-NEXT: vslideup.vi v8, v12, 4
63+
; LMULMAX1-NEXT: vle32.v v12, (a0)
64+
; LMULMAX1-NEXT: addi a0, a0, 16
65+
; LMULMAX1-NEXT: vle32.v v16, (a0)
66+
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, ma
67+
; LMULMAX1-NEXT: vmv.v.v v8, v12
68+
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, ma
69+
; LMULMAX1-NEXT: vslideup.vi v8, v16, 4
6770
; LMULMAX1-NEXT: ret
6871
%sv = load <8 x i32>, ptr %svp
6972
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
@@ -81,14 +84,14 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, ptr %
8184
;
8285
; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8:
8386
; LMULMAX1: # %bb.0:
87+
; LMULMAX1-NEXT: addi a1, a0, 16
8488
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
85-
; LMULMAX1-NEXT: vle32.v v12, (a0)
86-
; LMULMAX1-NEXT: addi a0, a0, 16
89+
; LMULMAX1-NEXT: vle32.v v12, (a1)
8790
; LMULMAX1-NEXT: vle32.v v16, (a0)
8891
; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, ma
89-
; LMULMAX1-NEXT: vslideup.vi v8, v12, 8
92+
; LMULMAX1-NEXT: vslideup.vi v8, v16, 8
9093
; LMULMAX1-NEXT: vsetivli zero, 16, e32, m4, tu, ma
91-
; LMULMAX1-NEXT: vslideup.vi v8, v16, 12
94+
; LMULMAX1-NEXT: vslideup.vi v8, v12, 12
9295
; LMULMAX1-NEXT: ret
9396
%sv = load <8 x i32>, ptr %svp
9497
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
@@ -163,7 +166,7 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
163166
; LMULMAX2-NEXT: vle32.v v8, (a1)
164167
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
165168
; LMULMAX2-NEXT: vle32.v v10, (a0)
166-
; LMULMAX2-NEXT: vsetivli zero, 2, e32, m1, tu, ma
169+
; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, tu, ma
167170
; LMULMAX2-NEXT: vmv.v.v v10, v8
168171
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
169172
; LMULMAX2-NEXT: vse32.v v10, (a0)
@@ -194,7 +197,7 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
194197
; LMULMAX2-NEXT: vle32.v v8, (a1)
195198
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
196199
; LMULMAX2-NEXT: vle32.v v10, (a0)
197-
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, tu, ma
200+
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, ma
198201
; LMULMAX2-NEXT: vslideup.vi v10, v8, 2
199202
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
200203
; LMULMAX2-NEXT: vse32.v v10, (a0)
@@ -505,9 +508,9 @@ define void @insert_v2i64_nxv16i64(ptr %psv0, ptr %psv1, <vscale x 16 x i64>* %o
505508
; CHECK: # %bb.0:
506509
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
507510
; CHECK-NEXT: vle64.v v8, (a0)
508-
; CHECK-NEXT: vle64.v v12, (a1)
509-
; CHECK-NEXT: vsetivli zero, 6, e64, m4, tu, ma
510-
; CHECK-NEXT: vslideup.vi v8, v12, 4
511+
; CHECK-NEXT: vle64.v v16, (a1)
512+
; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, ma
513+
; CHECK-NEXT: vslideup.vi v8, v16, 4
511514
; CHECK-NEXT: vs8r.v v8, (a2)
512515
; CHECK-NEXT: ret
513516
%sv0 = load <2 x i64>, ptr %psv0
@@ -536,7 +539,7 @@ define void @insert_v2i64_nxv16i64_lo2(ptr %psv, <vscale x 16 x i64>* %out) {
536539
; CHECK: # %bb.0:
537540
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
538541
; CHECK-NEXT: vle64.v v8, (a0)
539-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
542+
; CHECK-NEXT: vsetivli zero, 4, e64, m8, ta, ma
540543
; CHECK-NEXT: vslideup.vi v16, v8, 2
541544
; CHECK-NEXT: vs8r.v v16, (a1)
542545
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ define void @widen_3xv4i16(ptr %x, ptr %z) {
2727
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
2828
; CHECK-NEXT: vle16.v v8, (a0)
2929
; CHECK-NEXT: addi a2, a0, 8
30-
; CHECK-NEXT: vle16.v v9, (a2)
30+
; CHECK-NEXT: vle16.v v10, (a2)
3131
; CHECK-NEXT: addi a0, a0, 16
32-
; CHECK-NEXT: vle16.v v10, (a0)
33-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma
34-
; CHECK-NEXT: vslideup.vi v8, v9, 4
32+
; CHECK-NEXT: vle16.v v12, (a0)
33+
; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma
34+
; CHECK-NEXT: vslideup.vi v8, v10, 4
3535
; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma
36-
; CHECK-NEXT: vslideup.vi v8, v10, 8
36+
; CHECK-NEXT: vslideup.vi v8, v12, 8
3737
; CHECK-NEXT: vse16.v v8, (a1)
3838
; CHECK-NEXT: ret
3939
%a = load <4 x i16>, ptr %x
@@ -75,17 +75,17 @@ define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) {
7575
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
7676
; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0)
7777
; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8
78-
; CHECK-NO-MISALIGN-NEXT: vle8.v v9, (a2)
79-
; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16
8078
; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2)
79+
; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16
80+
; CHECK-NO-MISALIGN-NEXT: vle8.v v12, (a2)
8181
; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24
82-
; CHECK-NO-MISALIGN-NEXT: vle8.v v12, (a0)
83-
; CHECK-NO-MISALIGN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
84-
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v9, 4
82+
; CHECK-NO-MISALIGN-NEXT: vle8.v v14, (a0)
83+
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m2, tu, ma
84+
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 4
8585
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 12, e16, m2, tu, ma
86-
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 8
86+
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v12, 8
8787
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
88-
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v12, 12
88+
; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v14, 12
8989
; CHECK-NO-MISALIGN-NEXT: vse16.v v8, (a1)
9090
; CHECK-NO-MISALIGN-NEXT: ret
9191
;
@@ -188,17 +188,17 @@ define void @strided_constant_mismatch_4xv4i16(ptr %x, ptr %z) {
188188
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
189189
; CHECK-NEXT: vle16.v v8, (a0)
190190
; CHECK-NEXT: addi a2, a0, 2
191-
; CHECK-NEXT: vle16.v v9, (a2)
192-
; CHECK-NEXT: addi a2, a0, 6
193191
; CHECK-NEXT: vle16.v v10, (a2)
192+
; CHECK-NEXT: addi a2, a0, 6
193+
; CHECK-NEXT: vle16.v v12, (a2)
194194
; CHECK-NEXT: addi a0, a0, 8
195-
; CHECK-NEXT: vle16.v v12, (a0)
196-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma
197-
; CHECK-NEXT: vslideup.vi v8, v9, 4
195+
; CHECK-NEXT: vle16.v v14, (a0)
196+
; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma
197+
; CHECK-NEXT: vslideup.vi v8, v10, 4
198198
; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma
199-
; CHECK-NEXT: vslideup.vi v8, v10, 8
199+
; CHECK-NEXT: vslideup.vi v8, v12, 8
200200
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
201-
; CHECK-NEXT: vslideup.vi v8, v12, 12
201+
; CHECK-NEXT: vslideup.vi v8, v14, 12
202202
; CHECK-NEXT: vse16.v v8, (a1)
203203
; CHECK-NEXT: ret
204204
%a = load <4 x i16>, ptr %x
@@ -258,17 +258,17 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) {
258258
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
259259
; RV32-NEXT: vle16.v v8, (a0)
260260
; RV32-NEXT: add a0, a0, a2
261-
; RV32-NEXT: vle16.v v9, (a0)
262-
; RV32-NEXT: add a0, a0, a4
263261
; RV32-NEXT: vle16.v v10, (a0)
264-
; RV32-NEXT: add a0, a0, a2
262+
; RV32-NEXT: add a0, a0, a4
265263
; RV32-NEXT: vle16.v v12, (a0)
266-
; RV32-NEXT: vsetivli zero, 8, e16, m1, tu, ma
267-
; RV32-NEXT: vslideup.vi v8, v9, 4
264+
; RV32-NEXT: add a0, a0, a2
265+
; RV32-NEXT: vle16.v v14, (a0)
266+
; RV32-NEXT: vsetivli zero, 8, e16, m2, tu, ma
267+
; RV32-NEXT: vslideup.vi v8, v10, 4
268268
; RV32-NEXT: vsetivli zero, 12, e16, m2, tu, ma
269-
; RV32-NEXT: vslideup.vi v8, v10, 8
269+
; RV32-NEXT: vslideup.vi v8, v12, 8
270270
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
271-
; RV32-NEXT: vslideup.vi v8, v12, 12
271+
; RV32-NEXT: vslideup.vi v8, v14, 12
272272
; RV32-NEXT: vse16.v v8, (a1)
273273
; RV32-NEXT: ret
274274
;
@@ -277,17 +277,17 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) {
277277
; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
278278
; RV64-NEXT: vle16.v v8, (a0)
279279
; RV64-NEXT: add a0, a0, a2
280-
; RV64-NEXT: vle16.v v9, (a0)
281-
; RV64-NEXT: add a0, a0, a3
282280
; RV64-NEXT: vle16.v v10, (a0)
283-
; RV64-NEXT: add a0, a0, a2
281+
; RV64-NEXT: add a0, a0, a3
284282
; RV64-NEXT: vle16.v v12, (a0)
285-
; RV64-NEXT: vsetivli zero, 8, e16, m1, tu, ma
286-
; RV64-NEXT: vslideup.vi v8, v9, 4
283+
; RV64-NEXT: add a0, a0, a2
284+
; RV64-NEXT: vle16.v v14, (a0)
285+
; RV64-NEXT: vsetivli zero, 8, e16, m2, tu, ma
286+
; RV64-NEXT: vslideup.vi v8, v10, 4
287287
; RV64-NEXT: vsetivli zero, 12, e16, m2, tu, ma
288-
; RV64-NEXT: vslideup.vi v8, v10, 8
288+
; RV64-NEXT: vslideup.vi v8, v12, 8
289289
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
290-
; RV64-NEXT: vslideup.vi v8, v12, 12
290+
; RV64-NEXT: vslideup.vi v8, v14, 12
291291
; RV64-NEXT: vse16.v v8, (a1)
292292
; RV64-NEXT: ret
293293
;
@@ -296,17 +296,17 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) {
296296
; ZVE64F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
297297
; ZVE64F-NEXT: vle16.v v8, (a0)
298298
; ZVE64F-NEXT: add a0, a0, a2
299-
; ZVE64F-NEXT: vle16.v v9, (a0)
300-
; ZVE64F-NEXT: add a0, a0, a3
301299
; ZVE64F-NEXT: vle16.v v10, (a0)
302-
; ZVE64F-NEXT: add a0, a0, a2
300+
; ZVE64F-NEXT: add a0, a0, a3
303301
; ZVE64F-NEXT: vle16.v v12, (a0)
304-
; ZVE64F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
305-
; ZVE64F-NEXT: vslideup.vi v8, v9, 4
302+
; ZVE64F-NEXT: add a0, a0, a2
303+
; ZVE64F-NEXT: vle16.v v14, (a0)
304+
; ZVE64F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
305+
; ZVE64F-NEXT: vslideup.vi v8, v10, 4
306306
; ZVE64F-NEXT: vsetivli zero, 12, e16, m2, tu, ma
307-
; ZVE64F-NEXT: vslideup.vi v8, v10, 8
307+
; ZVE64F-NEXT: vslideup.vi v8, v12, 8
308308
; ZVE64F-NEXT: vsetivli zero, 16, e16, m2, ta, ma
309-
; ZVE64F-NEXT: vslideup.vi v8, v12, 12
309+
; ZVE64F-NEXT: vslideup.vi v8, v14, 12
310310
; ZVE64F-NEXT: vse16.v v8, (a1)
311311
; ZVE64F-NEXT: ret
312312
%a = load <4 x i16>, ptr %x

0 commit comments

Comments
 (0)