-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV] Mark RVV stores and segmented loads as masked pseudo #123106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Mark RVV stores and segmented loads as masked pseudo #123106
Conversation
So that we can turn masked operations with all-ones masks into their unmasked counterpart.
@llvm/pr-subscribers-backend-risc-v Author: Min-Yih Hsu (mshockwave) ChangesSo that we can turn masked operations with all-ones masks into their unmasked counterpart. Patch is 42.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123106.diff 9 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9ccf95970e5b53..4603fc4109f1fb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3791,13 +3791,14 @@ static bool isImplicitDef(SDValue V) {
return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
}
-static bool hasGPROut(unsigned Opc) {
- switch (RISCV::getRVVMCOpcode(Opc)) {
+static bool hasPassthru(const MCInstrDesc &MCID) {
+ switch (RISCV::getRVVMCOpcode(MCID.getOpcode())) {
case RISCV::VCPOP_M:
case RISCV::VFIRST_M:
- return true;
+ return false;
}
- return false;
+ // None of the stores has passthru.
+ return !MCID.mayStore();
}
// Optimize masked RVV pseudo instructions with a known all-ones mask to their
@@ -3829,8 +3830,9 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
#endif
SmallVector<SDValue, 8> Ops;
- // Skip the passthru operand at index 0 if !UseTUPseudo and no GPR out.
- bool ShouldSkip = !UseTUPseudo && !hasGPROut(Opc);
+ // Skip the passthru operand at index 0 if !UseTUPseudo and has the passthru
+ // operand.
+ bool ShouldSkip = !UseTUPseudo && hasPassthru(MCID);
for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
// Skip the mask, and the Glue.
SDValue Op = N->getOperand(I);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 21eedbb6268466..268bfe70673a2a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -1932,6 +1932,7 @@ multiclass VPseudoUSStore {
def "E" # eew # "_V_" # LInfo : VPseudoUSStoreNoMask<vreg, eew>,
VSESched<LInfo>;
def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSStoreMask<vreg, eew>,
+ RISCVMaskedPseudo<MaskIdx=2>,
VSESched<LInfo>;
}
}
@@ -1958,6 +1959,7 @@ multiclass VPseudoSStore {
def "E" # eew # "_V_" # LInfo : VPseudoSStoreNoMask<vreg, eew>,
VSSSched<eew, LInfo>;
def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSStoreMask<vreg, eew>,
+ RISCVMaskedPseudo<MaskIdx=3>,
VSSSched<eew, LInfo>;
}
}
@@ -1984,6 +1986,7 @@ multiclass VPseudoIStore<bit Ordered> {
VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
VPseudoIStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>,
+ RISCVMaskedPseudo<MaskIdx=3>,
VSXSched<dataEEW, Ordered, DataLInfo, IdxLInfo>;
}
}
@@ -3709,7 +3712,8 @@ multiclass VPseudoUSSegLoad {
def nf # "E" # eew # "_V_" # LInfo :
VPseudoUSSegLoadNoMask<vreg, eew, nf>, VLSEGSched<nf, eew, LInfo>;
def nf # "E" # eew # "_V_" # LInfo # "_MASK" :
- VPseudoUSSegLoadMask<vreg, eew, nf>, VLSEGSched<nf, eew, LInfo>;
+ VPseudoUSSegLoadMask<vreg, eew, nf>, RISCVMaskedPseudo<MaskIdx=2>,
+ VLSEGSched<nf, eew, LInfo>;
}
}
}
@@ -3726,7 +3730,8 @@ multiclass VPseudoUSSegLoadFF {
def nf # "E" # eew # "FF_V_" # LInfo :
VPseudoUSSegLoadFFNoMask<vreg, eew, nf>, VLSEGFFSched<nf, eew, LInfo>;
def nf # "E" # eew # "FF_V_" # LInfo # "_MASK" :
- VPseudoUSSegLoadFFMask<vreg, eew, nf>, VLSEGFFSched<nf, eew, LInfo>;
+ VPseudoUSSegLoadFFMask<vreg, eew, nf>, RISCVMaskedPseudo<MaskIdx=2>,
+ VLSEGFFSched<nf, eew, LInfo>;
}
}
}
@@ -3743,6 +3748,7 @@ multiclass VPseudoSSegLoad {
def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask<vreg, eew, nf>,
VLSSEGSched<nf, eew, LInfo>;
def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask<vreg, eew, nf>,
+ RISCVMaskedPseudo<MaskIdx=3>,
VLSSEGSched<nf, eew, LInfo>;
}
}
@@ -3773,6 +3779,7 @@ multiclass VPseudoISegLoad<bit Ordered> {
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
VPseudoISegLoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
+ RISCVMaskedPseudo<MaskIdx=3>,
VLXSEGSched<nf, dataEEW, Ordered, DataLInfo>;
}
}
@@ -3792,6 +3799,7 @@ multiclass VPseudoUSSegStore {
def nf # "E" # eew # "_V_" # LInfo : VPseudoUSSegStoreNoMask<vreg, eew, nf>,
VSSEGSched<nf, eew, LInfo>;
def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSSegStoreMask<vreg, eew, nf>,
+ RISCVMaskedPseudo<MaskIdx=2>,
VSSEGSched<nf, eew, LInfo>;
}
}
@@ -3809,6 +3817,7 @@ multiclass VPseudoSSegStore {
def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegStoreNoMask<vreg, eew, nf>,
VSSSEGSched<nf, eew, LInfo>;
def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegStoreMask<vreg, eew, nf>,
+ RISCVMaskedPseudo<MaskIdx=3>,
VSSSEGSched<nf, eew, LInfo>;
}
}
@@ -3839,6 +3848,7 @@ multiclass VPseudoISegStore<bit Ordered> {
def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
VPseudoISegStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
+ RISCVMaskedPseudo<MaskIdx=3>,
VSXSEGSched<nf, idxEEW, Ordered, DataLInfo>;
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
index 809ae2d2bebfe3..16e5e7b9199a39 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
@@ -29,6 +29,18 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) {
+; CHECK-LABEL: test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, i64, i64)
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64)
@@ -191,6 +203,18 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, i64, i64)
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64)
@@ -326,6 +350,18 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, i64, i64)
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64)
@@ -461,6 +497,18 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, i64, i64)
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64)
@@ -569,6 +617,18 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, i64, i64)
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64)
@@ -677,6 +737,18 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, i64, i64)
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64)
@@ -785,6 +857,18 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, i64, i64)
declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
index 3b9db2655e0338..0f7348b474ee4e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
@@ -31,6 +31,19 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vluxseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vluxseg2ei8.v v9, (a0), v8
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vluxseg2.triscv.vector.tuple_nxv1i8_2t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i16>, i64, i64)
declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vluxseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
@@ -640,6 +653,19 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vluxseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vluxseg3ei8.v v9, (a0), v8
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vluxseg3.triscv.vector.tuple_nxv1i8_3t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i16>, i64, i64)
declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vluxseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
@@ -1191,6 +1217,19 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vluxseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vluxseg4ei8.v v9, (a0), v8
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
+ %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1)
+ ret <vscale x 1 x i8> %1
+}
+
declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vluxseg4.triscv.vector.tuple_nxv1i8_4t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i16>, i64, i64)
declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vluxseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i16.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i16>, <vscale x 1 x i1>, i64, i64, i64)
@@ -1742,6 +1781,19 @@ entry:
ret <vscale x 1 x i8> %1
}
+define <vscale x 1 x i8> @test_vluxseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t_nxv1i8(ptr %base, <vscale x 1 x i8> %index, i64 %vl, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: test_vluxseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vluxseg5ei8.v v9, (a0), v8
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vluxseg5.mask.triscv.vecto...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 6716ce8b641f0e42e2343e1694ee578b027be0c4 d5ba103f98b9a9c6baca6a650bfd0b8f1f8d8bb4 llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll llvm/test/CodeGen/RISCV/rvv/vse.ll llvm/test/CodeGen/RISCV/rvv/vsse.ll llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
We were previously checking a combination of the vector policy op and the opcode to determine if we needed to skip copying the passthru from a masked pseudo to an unmasked pseudo. However we can just do this by checking RISCVII::isFirstDefTiedToFirstUse, which is a proxy for whether or not a pseudo has a passthru operand. This should hopefully remove the need for the changes in #123106
Can you be more specific about this (in description)? And why not all? |
bool ShouldSkip = !UseTUPseudo && !hasGPROut(Opc); | ||
// Skip the passthru operand at index 0 if !UseTUPseudo and has the passthru | ||
// operand. | ||
bool ShouldSkip = !UseTUPseudo && hasPassthru(MCID); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought it was weird that we were using the vec policy op to determine if the unmasked pseudo had a passthru operand, so I fixed it to use isFirstDefTiedToFirstUse
and remove hasGPROut
in ec5d17b, can you rebase on top of it and check if it works for stores now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had rebased and yes it works. I also agree it's cleaner in this way, thanks.
And do we need some changes in RISCVVectorPeephole? Because I missed that when doing the same thing for |
I think RISCVVectorPeephole should be OK, since after c8ee116 we handle the no-passthru masked pseudos properly, which is what this PR had to change (and was made more explicit in ec5d17b) |
I just updated both the title and description.
Oh, for some reasons non-segmented loads had been marked as masked. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
So that we can turn masked operations with all-ones masks into their unmasked counterpart. Note: loads other than segmented ones had been marked as masked.