Skip to content

Commit ec74ddd

Browse files
committed
[RISCV] Combine VP_SELECT constant false to use vmerge.vxm/vmerge.vim
Currently, when the false path of a vp_select is a splat vector, it is lowered to a vmv_v_x/vmv_v_i. The vmv is hoisted out of the loop and the whole copy in loop body by MachineLICM. By inverting the mask register and swapping the true and false values in the vp_select, we can eliminate some instructions inside the loop. corrent: https://godbolt.org/z/EnGMn3xeM expected similar form: https://godbolt.org/z/nWhGM6Ej5
1 parent 6e5ee4a commit ec74ddd

File tree

3 files changed

+27
-14
lines changed

3 files changed

+27
-14
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,15 +1628,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
16281628
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
16291629
if (Subtarget.hasVInstructions())
16301630
setTargetDAGCombine(
1631-
{ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1632-
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1633-
ISD::SRL, ISD::SHL, ISD::STORE,
1634-
ISD::SPLAT_VECTOR, ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1635-
ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1636-
ISD::MUL, ISD::SDIV, ISD::UDIV,
1637-
ISD::SREM, ISD::UREM, ISD::INSERT_VECTOR_ELT,
1638-
ISD::ABS, ISD::CTPOP, ISD::VECTOR_SHUFFLE,
1639-
ISD::VSELECT, ISD::VECREDUCE_ADD});
1631+
{ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1632+
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1633+
ISD::SRL, ISD::SHL, ISD::STORE,
1634+
ISD::SPLAT_VECTOR, ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1635+
ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1636+
ISD::MUL, ISD::SDIV, ISD::UDIV,
1637+
ISD::SREM, ISD::UREM, ISD::INSERT_VECTOR_ELT,
1638+
ISD::ABS, ISD::CTPOP, ISD::VECTOR_SHUFFLE,
1639+
ISD::VSELECT, ISD::VECREDUCE_ADD, ISD::VP_SELECT});
16401640

16411641
if (Subtarget.hasVendorXTHeadMemPair())
16421642
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
@@ -19732,6 +19732,18 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1973219732
return performSELECTCombine(N, DAG, Subtarget);
1973319733
case ISD::VSELECT:
1973419734
return performVSELECTCombine(N, DAG);
19735+
case ISD::VP_SELECT: {
19736+
if (SDValue Op2 = N->getOperand(2);
19737+
Op2.hasOneUse() && (Op2.getOpcode() == ISD::SPLAT_VECTOR ||
19738+
Op2.getOpcode() == ISD::SPLAT_VECTOR_PARTS)) {
19739+
SDLoc DL(N);
19740+
SDValue Op0 = N->getOperand(0);
19741+
SDValue Val = DAG.getLogicalNOT(DL, Op0, Op0.getValueType());
19742+
return DAG.getNode(ISD::VP_SELECT, DL, N->getValueType(0), Val,
19743+
N->getOperand(2), N->getOperand(1), N->getOperand(3));
19744+
}
19745+
return SDValue();
19746+
}
1973519747
case RISCVISD::CZERO_EQZ:
1973619748
case RISCVISD::CZERO_NEZ: {
1973719749
SDValue Val = N->getOperand(0);

llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1>
3434
; ZVE32: # %bb.0:
3535
; ZVE32-NEXT: csrr a1, vlenb
3636
; ZVE32-NEXT: srli a1, a1, 3
37-
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
38-
; ZVE32-NEXT: vmv.v.i v8, 0
37+
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
3938
; ZVE32-NEXT: vle8.v v8, (a0), v0.t
39+
; ZVE32-NEXT: vmnot.m v0, v0
40+
; ZVE32-NEXT: vmerge.vim v8, v8, 0, v0
4041
; ZVE32-NEXT: ret
4142
%load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)
4243
ret <vscale x 1 x i8> %load

llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -483,10 +483,10 @@ define <vscale x 2 x i64> @select_nxv2i64_constant_true(<vscale x 2 x i1> %a, <v
483483
define <vscale x 2 x i64> @select_nxv2i64_constant_false(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, i32 zeroext %evl) {
484484
; CHECK-LABEL: select_nxv2i64_constant_false:
485485
; CHECK: # %bb.0:
486-
; CHECK-NEXT: li a1, 100
487486
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
488-
; CHECK-NEXT: vmv.v.x v10, a1
489-
; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
487+
; CHECK-NEXT: vmnot.m v0, v0
488+
; CHECK-NEXT: li a0, 100
489+
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
490490
; CHECK-NEXT: ret
491491
%v = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> splat (i64 100), i32 %evl)
492492
ret <vscale x 2 x i64> %v

0 commit comments

Comments
 (0)