Skip to content

Commit 86b32c4

Browse files
authored
[RISCV] Match strided load via DAG combine (#66800)
This change matches a masked.stride.load from a mgather node whose index operand is a strided sequence. We can reuse the VID matching from build_vector lowering for this purpose. Note that this duplicates the matching done at IR by RISCVGatherScatterLowering.cpp. Now that we can widen gathers to a wider SEW, I don't see a good way to remove this duplication. The only obvious alternative is to move thw widening transform to IR, but that's a no-go as I want other DAGs to run first. I think we should just live with the duplication - particularly since the reuse is isSimpleVIDSequence means the duplication is somewhat minimal.
1 parent 98d5ab7 commit 86b32c4

File tree

2 files changed

+61
-48
lines changed

2 files changed

+61
-48
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14055,6 +14055,35 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1405514055
MGN->getBasePtr(), Index, ScaleOp},
1405614056
MGN->getMemOperand(), IndexType, MGN->getExtensionType());
1405714057

14058+
if (Index.getOpcode() == ISD::BUILD_VECTOR &&
14059+
MGN->getExtensionType() == ISD::NON_EXTLOAD) {
14060+
if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index);
14061+
SimpleVID && SimpleVID->StepDenominator == 1) {
14062+
const int64_t StepNumerator = SimpleVID->StepNumerator;
14063+
const int64_t Addend = SimpleVID->Addend;
14064+
14065+
// Note: We don't need to check alignment here since (by assumption
14066+
// from the existance of the gather), our offsets must be sufficiently
14067+
// aligned.
14068+
14069+
const EVT PtrVT = getPointerTy(DAG.getDataLayout());
14070+
assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
14071+
assert(IndexType == ISD::UNSIGNED_SCALED);
14072+
SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
14073+
DAG.getConstant(Addend, DL, PtrVT));
14074+
14075+
SDVTList VTs = DAG.getVTList({VT, MVT::Other});
14076+
SDValue IntID =
14077+
DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
14078+
XLenVT);
14079+
SDValue Ops[] =
14080+
{MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
14081+
DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
14082+
return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
14083+
Ops, VT, MGN->getMemOperand());
14084+
}
14085+
}
14086+
1405814087
SmallVector<int> ShuffleMask;
1405914088
if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
1406014089
matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

Lines changed: 32 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13567,20 +13567,16 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
1356713567
define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
1356813568
; RV32-LABEL: mgather_strided_2xSEW:
1356913569
; RV32: # %bb.0:
13570-
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
13571-
; RV32-NEXT: vid.v v8
13572-
; RV32-NEXT: vsll.vi v9, v8, 3
13573-
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13574-
; RV32-NEXT: vluxei8.v v8, (a0), v9
13570+
; RV32-NEXT: li a1, 8
13571+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13572+
; RV32-NEXT: vlse32.v v8, (a0), a1
1357513573
; RV32-NEXT: ret
1357613574
;
1357713575
; RV64V-LABEL: mgather_strided_2xSEW:
1357813576
; RV64V: # %bb.0:
13579-
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
13580-
; RV64V-NEXT: vid.v v8
13581-
; RV64V-NEXT: vsll.vi v9, v8, 3
13582-
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13583-
; RV64V-NEXT: vluxei8.v v8, (a0), v9
13577+
; RV64V-NEXT: li a1, 8
13578+
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13579+
; RV64V-NEXT: vlse32.v v8, (a0), a1
1358413580
; RV64V-NEXT: ret
1358513581
;
1358613582
; RV64ZVE32F-LABEL: mgather_strided_2xSEW:
@@ -13684,22 +13680,18 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
1368413680
define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
1368513681
; RV32-LABEL: mgather_strided_2xSEW_with_offset:
1368613682
; RV32: # %bb.0:
13687-
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
13688-
; RV32-NEXT: vid.v v8
13689-
; RV32-NEXT: vsll.vi v8, v8, 3
13690-
; RV32-NEXT: vadd.vi v9, v8, 4
13691-
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13692-
; RV32-NEXT: vluxei8.v v8, (a0), v9
13683+
; RV32-NEXT: addi a0, a0, 4
13684+
; RV32-NEXT: li a1, 8
13685+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13686+
; RV32-NEXT: vlse32.v v8, (a0), a1
1369313687
; RV32-NEXT: ret
1369413688
;
1369513689
; RV64V-LABEL: mgather_strided_2xSEW_with_offset:
1369613690
; RV64V: # %bb.0:
13697-
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
13698-
; RV64V-NEXT: vid.v v8
13699-
; RV64V-NEXT: vsll.vi v8, v8, 3
13700-
; RV64V-NEXT: vadd.vi v9, v8, 4
13701-
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13702-
; RV64V-NEXT: vluxei8.v v8, (a0), v9
13691+
; RV64V-NEXT: addi a0, a0, 4
13692+
; RV64V-NEXT: li a1, 8
13693+
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13694+
; RV64V-NEXT: vlse32.v v8, (a0), a1
1370313695
; RV64V-NEXT: ret
1370413696
;
1370513697
; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset:
@@ -13804,20 +13796,18 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
1380413796
define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
1380513797
; RV32-LABEL: mgather_reverse_unit_strided_2xSEW:
1380613798
; RV32: # %bb.0:
13807-
; RV32-NEXT: lui a1, 65858
13808-
; RV32-NEXT: addi a1, a1, -2020
13799+
; RV32-NEXT: addi a0, a0, 28
13800+
; RV32-NEXT: li a1, -4
1380913801
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13810-
; RV32-NEXT: vmv.s.x v9, a1
13811-
; RV32-NEXT: vluxei8.v v8, (a0), v9
13802+
; RV32-NEXT: vlse32.v v8, (a0), a1
1381213803
; RV32-NEXT: ret
1381313804
;
1381413805
; RV64V-LABEL: mgather_reverse_unit_strided_2xSEW:
1381513806
; RV64V: # %bb.0:
13816-
; RV64V-NEXT: lui a1, 65858
13817-
; RV64V-NEXT: addiw a1, a1, -2020
13807+
; RV64V-NEXT: addi a0, a0, 28
13808+
; RV64V-NEXT: li a1, -4
1381813809
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13819-
; RV64V-NEXT: vmv.s.x v9, a1
13820-
; RV64V-NEXT: vluxei8.v v8, (a0), v9
13810+
; RV64V-NEXT: vlse32.v v8, (a0), a1
1382113811
; RV64V-NEXT: ret
1382213812
;
1382313813
; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW:
@@ -13922,20 +13912,18 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
1392213912
define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
1392313913
; RV32-LABEL: mgather_reverse_strided_2xSEW:
1392413914
; RV32: # %bb.0:
13925-
; RV32-NEXT: lui a1, 16577
13926-
; RV32-NEXT: addi a1, a1, 1052
13915+
; RV32-NEXT: addi a0, a0, 28
13916+
; RV32-NEXT: li a1, -8
1392713917
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13928-
; RV32-NEXT: vmv.s.x v9, a1
13929-
; RV32-NEXT: vluxei8.v v8, (a0), v9
13918+
; RV32-NEXT: vlse32.v v8, (a0), a1
1393013919
; RV32-NEXT: ret
1393113920
;
1393213921
; RV64V-LABEL: mgather_reverse_strided_2xSEW:
1393313922
; RV64V: # %bb.0:
13934-
; RV64V-NEXT: lui a1, 16577
13935-
; RV64V-NEXT: addiw a1, a1, 1052
13923+
; RV64V-NEXT: addi a0, a0, 28
13924+
; RV64V-NEXT: li a1, -8
1393613925
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13937-
; RV64V-NEXT: vmv.s.x v9, a1
13938-
; RV64V-NEXT: vluxei8.v v8, (a0), v9
13926+
; RV64V-NEXT: vlse32.v v8, (a0), a1
1393913927
; RV64V-NEXT: ret
1394013928
;
1394113929
; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW:
@@ -14386,20 +14374,16 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
1438614374
define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
1438714375
; RV32V-LABEL: mgather_gather_4xSEW:
1438814376
; RV32V: # %bb.0:
14389-
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
14390-
; RV32V-NEXT: vid.v v8
14391-
; RV32V-NEXT: vsll.vi v9, v8, 4
14392-
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
14393-
; RV32V-NEXT: vluxei8.v v8, (a0), v9
14377+
; RV32V-NEXT: li a1, 16
14378+
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
14379+
; RV32V-NEXT: vlse64.v v8, (a0), a1
1439414380
; RV32V-NEXT: ret
1439514381
;
1439614382
; RV64V-LABEL: mgather_gather_4xSEW:
1439714383
; RV64V: # %bb.0:
14398-
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
14399-
; RV64V-NEXT: vid.v v8
14400-
; RV64V-NEXT: vsll.vi v9, v8, 4
14401-
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
14402-
; RV64V-NEXT: vluxei8.v v8, (a0), v9
14384+
; RV64V-NEXT: li a1, 16
14385+
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
14386+
; RV64V-NEXT: vlse64.v v8, (a0), a1
1440314387
; RV64V-NEXT: ret
1440414388
;
1440514389
; RV32ZVE32F-LABEL: mgather_gather_4xSEW:

0 commit comments

Comments
 (0)