Skip to content

Commit f54ebc7

Browse files
committed
[RISCV] Combine vslideup_vl with known VL to a smaller LMUL
Similiar to llvm#66267, we can perform a vslideup_vl on a smaller type if we know the highest lane that will be written to, which can be determined from VL. This is an alternative to llvm#65997 and llvm#66087
1 parent c44ca73 commit f54ebc7

12 files changed

+662
-424
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14251,14 +14251,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1425114251
if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
1425214252
return V;
1425314253
break;
14254+
case RISCVISD::VSLIDEUP_VL:
1425414255
case RISCVISD::VSLIDEDOWN_VL: {
1425514256
MVT OrigVT = N->getSimpleValueType(0);
1425614257
auto *CVL = dyn_cast<ConstantSDNode>(N->getOperand(4));
14257-
auto *CIdx = dyn_cast<ConstantSDNode>(N->getOperand(2));
14258-
if (!CVL || !CIdx)
14258+
if (!CVL)
1425914259
break;
14260-
unsigned MaxIdx = CVL->getZExtValue() + CIdx->getZExtValue() - 1;
14261-
// We can try and reduce the LMUL that a vslidedown uses if we know where
14260+
14261+
// The maximum index read or written is VL - 1 for vslideup, and VL + offset
14262+
// - 1 for vslidedown.
14263+
unsigned MaxIdx = CVL->getZExtValue() - 1;
14264+
if (N->getOpcode() == RISCVISD::VSLIDEDOWN_VL) {
14265+
auto *COffset = dyn_cast<ConstantSDNode>(N->getOperand(2));
14266+
if (!COffset)
14267+
break;
14268+
MaxIdx += COffset->getZExtValue();
14269+
}
14270+
14271+
// We can try and reduce the LMUL that a vslide* uses if we know where
1426214272
// the maximum index is. For example, if the target has Zvl128b, a
1426314273
// vslidedown of e32 with with an offset of 4 and VL of 2 is only going to
1426414274
// read from the first 2 registers at most. So if we were operating at
@@ -14280,7 +14290,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1428014290
DAG.getNode(RISCVISD::VMSET_VL, SDLoc(N), getMaskTypeFor(*ShrunkVT),
1428114291
N->getOperand(4));
1428214292
SDValue ShrunkSlidedown =
14283-
DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, *ShrunkVT,
14293+
DAG.getNode(N->getOpcode(), DL, *ShrunkVT,
1428414294
{ShrunkPassthru, ShrunkInVec, N->getOperand(2),
1428514295
ShrunkMask, N->getOperand(4), N->getOperand(5)});
1428614296
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigVT, N->getOperand(0),

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind {
108108
; CHECK-NEXT: vmv.v.i v8, 0
109109
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
110110
; CHECK-NEXT: vmv.s.x v12, a0
111-
; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma
111+
; CHECK-NEXT: vsetivli zero, 2, e8, m1, tu, ma
112112
; CHECK-NEXT: vslideup.vi v8, v12, 1
113113
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
114114
; CHECK-NEXT: vand.vi v8, v8, 1

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, ptr %
2727
; CHECK: # %bb.0:
2828
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
2929
; CHECK-NEXT: vle32.v v12, (a0)
30-
; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma
30+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3131
; CHECK-NEXT: vslideup.vi v8, v12, 2
3232
; CHECK-NEXT: ret
3333
%sv = load <2 x i32>, ptr %svp
@@ -40,7 +40,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
4040
; CHECK: # %bb.0:
4141
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
4242
; CHECK-NEXT: vle32.v v12, (a0)
43-
; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma
43+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma
4444
; CHECK-NEXT: vslideup.vi v8, v12, 6
4545
; CHECK-NEXT: ret
4646
%sv = load <2 x i32>, ptr %svp
@@ -65,7 +65,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, ptr %
6565
; LMULMAX1-NEXT: vle32.v v16, (a0)
6666
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, ma
6767
; LMULMAX1-NEXT: vmv.v.v v8, v12
68-
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, ma
68+
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m2, tu, ma
6969
; LMULMAX1-NEXT: vslideup.vi v8, v16, 4
7070
; LMULMAX1-NEXT: ret
7171
%sv = load <8 x i32>, ptr %svp
@@ -197,7 +197,7 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
197197
; LMULMAX2-NEXT: vle32.v v8, (a1)
198198
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
199199
; LMULMAX2-NEXT: vle32.v v10, (a0)
200-
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, ma
200+
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, tu, ma
201201
; LMULMAX2-NEXT: vslideup.vi v10, v8, 2
202202
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
203203
; LMULMAX2-NEXT: vse32.v v10, (a0)
@@ -509,7 +509,7 @@ define void @insert_v2i64_nxv16i64(ptr %psv0, ptr %psv1, <vscale x 16 x i64>* %o
509509
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
510510
; CHECK-NEXT: vle64.v v8, (a0)
511511
; CHECK-NEXT: vle64.v v16, (a1)
512-
; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, ma
512+
; CHECK-NEXT: vsetivli zero, 6, e64, m4, tu, ma
513513
; CHECK-NEXT: vslideup.vi v8, v16, 4
514514
; CHECK-NEXT: vs8r.v v8, (a2)
515515
; CHECK-NEXT: ret
@@ -539,7 +539,7 @@ define void @insert_v2i64_nxv16i64_lo2(ptr %psv, <vscale x 16 x i64>* %out) {
539539
; CHECK: # %bb.0:
540540
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
541541
; CHECK-NEXT: vle64.v v8, (a0)
542-
; CHECK-NEXT: vsetivli zero, 4, e64, m8, ta, ma
542+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
543543
; CHECK-NEXT: vslideup.vi v16, v8, 2
544544
; CHECK-NEXT: vs8r.v v16, (a1)
545545
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) {
5454
; CHECK-NEXT: li a1, 32
5555
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
5656
; CHECK-NEXT: vmv.s.x v16, a0
57-
; CHECK-NEXT: vsetivli zero, 5, e32, m8, tu, ma
57+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
5858
; CHECK-NEXT: vslideup.vi v8, v16, 4
5959
; CHECK-NEXT: ret
6060
%b = insertelement <32 x i32> %a, i32 %y, i32 4

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -811,9 +811,11 @@ define i64 @explode_4xi64(<4 x i64> %v) {
811811
; RV32-NEXT: vsrl.vx v10, v8, a0
812812
; RV32-NEXT: vmv.x.s a1, v10
813813
; RV32-NEXT: vmv.x.s a2, v8
814+
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
814815
; RV32-NEXT: vslidedown.vi v10, v8, 1
815-
; RV32-NEXT: vsrl.vx v12, v10, a0
816-
; RV32-NEXT: vmv.x.s a3, v12
816+
; RV32-NEXT: vmv.x.s a3, v10
817+
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
818+
; RV32-NEXT: vsrl.vx v10, v10, a0
817819
; RV32-NEXT: vmv.x.s a4, v10
818820
; RV32-NEXT: vslidedown.vi v10, v8, 2
819821
; RV32-NEXT: vsrl.vx v12, v10, a0
@@ -823,12 +825,12 @@ define i64 @explode_4xi64(<4 x i64> %v) {
823825
; RV32-NEXT: vsrl.vx v10, v8, a0
824826
; RV32-NEXT: vmv.x.s a0, v10
825827
; RV32-NEXT: vmv.x.s a7, v8
826-
; RV32-NEXT: add a1, a1, a3
827-
; RV32-NEXT: add a4, a2, a4
828-
; RV32-NEXT: sltu a2, a4, a2
828+
; RV32-NEXT: add a1, a1, a4
829+
; RV32-NEXT: add a3, a2, a3
830+
; RV32-NEXT: sltu a2, a3, a2
829831
; RV32-NEXT: add a1, a1, a2
830-
; RV32-NEXT: add a6, a4, a6
831-
; RV32-NEXT: sltu a2, a6, a4
832+
; RV32-NEXT: add a6, a3, a6
833+
; RV32-NEXT: sltu a2, a6, a3
832834
; RV32-NEXT: add a1, a1, a5
833835
; RV32-NEXT: add a0, a2, a0
834836
; RV32-NEXT: add a1, a1, a0
@@ -875,7 +877,7 @@ define i64 @explode_8xi64(<8 x i64> %v) {
875877
; RV32-NEXT: vsrl.vx v12, v8, a0
876878
; RV32-NEXT: vmv.x.s a1, v12
877879
; RV32-NEXT: vmv.x.s a2, v8
878-
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
880+
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
879881
; RV32-NEXT: vslidedown.vi v12, v8, 1
880882
; RV32-NEXT: vmv.x.s a3, v12
881883
; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
@@ -887,7 +889,9 @@ define i64 @explode_8xi64(<8 x i64> %v) {
887889
; RV32-NEXT: vsrl.vx v16, v12, a0
888890
; RV32-NEXT: vmv.x.s a5, v16
889891
; RV32-NEXT: vmv.x.s a6, v12
892+
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
890893
; RV32-NEXT: vslidedown.vi v12, v8, 3
894+
; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
891895
; RV32-NEXT: vsrl.vx v16, v12, a0
892896
; RV32-NEXT: vmv.x.s a7, v16
893897
; RV32-NEXT: vmv.x.s t0, v12
@@ -1033,7 +1037,7 @@ define i64 @explode_16xi64(<16 x i64> %v) {
10331037
; RV32-NEXT: vmv.x.s a0, v16
10341038
; RV32-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
10351039
; RV32-NEXT: vmv.x.s a0, v8
1036-
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
1040+
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
10371041
; RV32-NEXT: vslidedown.vi v16, v8, 1
10381042
; RV32-NEXT: vmv.x.s a3, v16
10391043
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
@@ -1045,7 +1049,7 @@ define i64 @explode_16xi64(<16 x i64> %v) {
10451049
; RV32-NEXT: vsrl.vx v24, v16, a1
10461050
; RV32-NEXT: vmv.x.s a5, v24
10471051
; RV32-NEXT: vmv.x.s a6, v16
1048-
; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
1052+
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
10491053
; RV32-NEXT: vslidedown.vi v16, v8, 3
10501054
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
10511055
; RV32-NEXT: vsrl.vx v24, v16, a1
@@ -1068,31 +1072,33 @@ define i64 @explode_16xi64(<16 x i64> %v) {
10681072
; RV32-NEXT: vmv.x.s t4, v16
10691073
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
10701074
; RV32-NEXT: vsrl.vx v16, v16, a1
1071-
; RV32-NEXT: vmv.x.s ra, v16
1075+
; RV32-NEXT: vmv.x.s s0, v16
1076+
; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma
10721077
; RV32-NEXT: vslidedown.vi v16, v8, 7
1073-
; RV32-NEXT: vsrl.vx v24, v16, a1
1074-
; RV32-NEXT: vmv.x.s s5, v24
10751078
; RV32-NEXT: vmv.x.s t6, v16
1079+
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
1080+
; RV32-NEXT: vsrl.vx v16, v16, a1
1081+
; RV32-NEXT: vmv.x.s ra, v16
10761082
; RV32-NEXT: vslidedown.vi v16, v8, 8
10771083
; RV32-NEXT: vsrl.vx v24, v16, a1
10781084
; RV32-NEXT: vmv.x.s s6, v24
1079-
; RV32-NEXT: vmv.x.s s0, v16
1085+
; RV32-NEXT: vmv.x.s s1, v16
10801086
; RV32-NEXT: vslidedown.vi v16, v8, 9
10811087
; RV32-NEXT: vsrl.vx v24, v16, a1
10821088
; RV32-NEXT: vmv.x.s s7, v24
1083-
; RV32-NEXT: vmv.x.s s1, v16
1089+
; RV32-NEXT: vmv.x.s s2, v16
10841090
; RV32-NEXT: vslidedown.vi v16, v8, 10
10851091
; RV32-NEXT: vsrl.vx v24, v16, a1
10861092
; RV32-NEXT: vmv.x.s s8, v24
1087-
; RV32-NEXT: vmv.x.s s2, v16
1093+
; RV32-NEXT: vmv.x.s s3, v16
10881094
; RV32-NEXT: vslidedown.vi v16, v8, 11
10891095
; RV32-NEXT: vsrl.vx v24, v16, a1
10901096
; RV32-NEXT: vmv.x.s s9, v24
1091-
; RV32-NEXT: vmv.x.s s3, v16
1097+
; RV32-NEXT: vmv.x.s s4, v16
10921098
; RV32-NEXT: vslidedown.vi v16, v8, 12
10931099
; RV32-NEXT: vsrl.vx v24, v16, a1
10941100
; RV32-NEXT: vmv.x.s s10, v24
1095-
; RV32-NEXT: vmv.x.s s4, v16
1101+
; RV32-NEXT: vmv.x.s s5, v16
10961102
; RV32-NEXT: vslidedown.vi v0, v8, 13
10971103
; RV32-NEXT: vsrl.vx v16, v0, a1
10981104
; RV32-NEXT: vmv.x.s s11, v16
@@ -1121,39 +1127,39 @@ define i64 @explode_16xi64(<16 x i64> %v) {
11211127
; RV32-NEXT: add a0, a0, a1
11221128
; RV32-NEXT: add t2, t1, t2
11231129
; RV32-NEXT: sltu a1, t2, t1
1124-
; RV32-NEXT: add a1, a1, ra
1130+
; RV32-NEXT: add a1, a1, s0
11251131
; RV32-NEXT: add a0, a0, a1
11261132
; RV32-NEXT: add t4, t2, t4
11271133
; RV32-NEXT: sltu a1, t4, t2
1128-
; RV32-NEXT: add a1, a1, s5
1134+
; RV32-NEXT: add a1, a1, ra
11291135
; RV32-NEXT: add a0, a0, a1
11301136
; RV32-NEXT: add t6, t4, t6
11311137
; RV32-NEXT: sltu a1, t6, t4
11321138
; RV32-NEXT: add a1, a1, s6
11331139
; RV32-NEXT: add a0, a0, a1
1134-
; RV32-NEXT: add s0, t6, s0
1135-
; RV32-NEXT: sltu a1, s0, t6
1140+
; RV32-NEXT: add s1, t6, s1
1141+
; RV32-NEXT: sltu a1, s1, t6
11361142
; RV32-NEXT: add a1, a1, s7
11371143
; RV32-NEXT: add a0, a0, a1
1138-
; RV32-NEXT: add s1, s0, s1
1139-
; RV32-NEXT: sltu a1, s1, s0
1140-
; RV32-NEXT: add a1, a1, s8
1141-
; RV32-NEXT: add a0, a0, a1
11421144
; RV32-NEXT: add s2, s1, s2
11431145
; RV32-NEXT: sltu a1, s2, s1
1144-
; RV32-NEXT: add a1, a1, s9
1146+
; RV32-NEXT: add a1, a1, s8
11451147
; RV32-NEXT: add a0, a0, a1
11461148
; RV32-NEXT: add s3, s2, s3
11471149
; RV32-NEXT: sltu a1, s3, s2
1148-
; RV32-NEXT: add a1, a1, s10
1150+
; RV32-NEXT: add a1, a1, s9
11491151
; RV32-NEXT: add a0, a0, a1
11501152
; RV32-NEXT: add s4, s3, s4
11511153
; RV32-NEXT: sltu a1, s4, s3
1154+
; RV32-NEXT: add a1, a1, s10
1155+
; RV32-NEXT: add a0, a0, a1
1156+
; RV32-NEXT: add s5, s4, s5
1157+
; RV32-NEXT: sltu a1, s5, s4
11521158
; RV32-NEXT: add a1, a1, s11
11531159
; RV32-NEXT: add a0, a0, a1
11541160
; RV32-NEXT: vmv.x.s a1, v24
1155-
; RV32-NEXT: add a2, s4, a2
1156-
; RV32-NEXT: sltu a3, a2, s4
1161+
; RV32-NEXT: add a2, s5, a2
1162+
; RV32-NEXT: sltu a3, a2, s5
11571163
; RV32-NEXT: add a1, a3, a1
11581164
; RV32-NEXT: vmv.x.s a3, v16
11591165
; RV32-NEXT: add a0, a0, a1

0 commit comments

Comments
 (0)