Skip to content

Commit 188d5c7

Browse files
committed
[RISCV] Add a combine to form masked.store from unit strided store
Add a DAG combine to form a masked.store from a masked_strided_store intrinsic with stride equal to element size. This is the store analogy to PR #65674. As seen in the tests, this does pickup a few cases that we'd previously missed due to selection ordering. We match strided stores early without going through the recently added generic mscatter combines, and thus weren't recognizing the unit strided store.
1 parent 8f8f449 commit 188d5c7

File tree

4 files changed

+22
-8
lines changed

4 files changed

+22
-8
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14467,6 +14467,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1446714467
ISD::UNINDEXED, ISD::NON_EXTLOAD);
1446814468
return SDValue();
1446914469
}
14470+
case Intrinsic::riscv_masked_strided_store: {
14471+
auto *Store = cast<MemIntrinsicSDNode>(N);
14472+
SDValue Value = N->getOperand(2);
14473+
SDValue Base = N->getOperand(3);
14474+
SDValue Stride = N->getOperand(4);
14475+
SDValue Mask = N->getOperand(5);
14476+
14477+
// If the stride is equal to the element size in bytes, we can use
14478+
// a masked.store.
14479+
const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
14480+
if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
14481+
StrideC && StrideC->getZExtValue() == ElementSize)
14482+
return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
14483+
DAG.getUNDEF(XLenVT), Mask,
14484+
Store->getMemoryVT(), Store->getMemOperand(),
14485+
ISD::UNINDEXED, false);
14486+
return SDValue();
14487+
}
1447014488
case Intrinsic::riscv_vcpop:
1447114489
case Intrinsic::riscv_vcpop_mask:
1447214490
case Intrinsic::riscv_vfirst:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11296,9 +11296,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
1129611296
define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
1129711297
; CHECK-LABEL: mscatter_unit_stride:
1129811298
; CHECK: # %bb.0:
11299-
; CHECK-NEXT: li a1, 2
1130011299
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11301-
; CHECK-NEXT: vsse16.v v8, (a0), a1
11300+
; CHECK-NEXT: vse16.v v8, (a0)
1130211301
; CHECK-NEXT: ret
1130311302
%head = insertelement <8 x i1> poison, i1 true, i16 0
1130411303
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -11311,9 +11310,8 @@ define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
1131111310
; CHECK-LABEL: mscatter_unit_stride_with_offset:
1131211311
; CHECK: # %bb.0:
1131311312
; CHECK-NEXT: addi a0, a0, 10
11314-
; CHECK-NEXT: li a1, 2
1131511313
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11316-
; CHECK-NEXT: vsse16.v v8, (a0), a1
11314+
; CHECK-NEXT: vse16.v v8, (a0)
1131711315
; CHECK-NEXT: ret
1131811316
%head = insertelement <8 x i1> poison, i1 true, i16 0
1131911317
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer

llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,7 @@ define void @stride_one_store(i64 %n, ptr %p) {
114114
; RV64: # %bb.0:
115115
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
116116
; RV64-NEXT: vmv.v.i v8, 0
117-
; RV64-NEXT: li a0, 8
118-
; RV64-NEXT: vsse64.v v8, (a1), a0
117+
; RV64-NEXT: vs1r.v v8, (a1)
119118
; RV64-NEXT: ret
120119
%step = tail call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
121120
%gep = getelementptr inbounds i64, ptr %p, <vscale x 1 x i64> %step

llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,8 @@ define void @strided_store_i8_nostride(ptr %p, <32 x i8> %v, <32 x i1> %m) {
8989
; CHECK-LABEL: strided_store_i8_nostride:
9090
; CHECK: # %bb.0:
9191
; CHECK-NEXT: li a1, 32
92-
; CHECK-NEXT: li a2, 1
9392
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
94-
; CHECK-NEXT: vsse8.v v8, (a0), a2, v0.t
93+
; CHECK-NEXT: vse8.v v8, (a0), v0.t
9594
; CHECK-NEXT: ret
9695
call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 1, <32 x i1> %m)
9796
ret void

0 commit comments

Comments
 (0)