Skip to content

Commit c1d7c1b

Browse files
committed
[RISCV] Lower VP_SELECT constant false to use vmerge.vxm/vmerge.vim
Currently, when the false path of a vp_select is a splat vector, it is lowered to a vmv_v_x/vmv_v_i. The vmv is hoisted out of the loop and the whole copy in loop body by MachineLICM. By inverting the mask register and swapping the true and false values in the vp_select, we can eliminate some instructions inside the loop. corrent: https://godbolt.org/z/EnGMn3xeM expected similar form: https://godbolt.org/z/nWhGM6Ej5
1 parent dca2b26 commit c1d7c1b

File tree

4 files changed

+51
-6
lines changed

4 files changed

+51
-6
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8170,11 +8170,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
81708170
return lowerRESET_FPENV(Op, DAG);
81718171
case ISD::EH_DWARF_CFA:
81728172
return lowerEH_DWARF_CFA(Op, DAG);
8173+
case ISD::VP_SELECT:
8174+
if (SDValue Op2 = Op.getOperand(2);
8175+
Op2.hasOneUse() && (Op2.getOpcode() == ISD::SPLAT_VECTOR ||
8176+
Op2.getOpcode() == ISD::SPLAT_VECTOR_PARTS))
8177+
return lowerVPSelectConstantFalse(Op, DAG);
8178+
else
8179+
return lowerVPOp(Op, DAG);
81738180
case ISD::VP_MERGE:
81748181
if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
81758182
return lowerVPMergeMask(Op, DAG);
81768183
[[fallthrough]];
8177-
case ISD::VP_SELECT:
81788184
case ISD::VP_ADD:
81798185
case ISD::VP_SUB:
81808186
case ISD::VP_MUL:
@@ -13176,6 +13182,22 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
1317613182
return convertFromScalableVector(VT, Result, DAG, Subtarget);
1317713183
}
1317813184

13185+
SDValue RISCVTargetLowering::lowerVPSelectConstantFalse(SDValue Op,
13186+
SelectionDAG &DAG) const {
13187+
SDLoc DL(Op);
13188+
MVT VT = Op.getSimpleValueType();
13189+
SDValue TrueVal = Op.getOperand(1);
13190+
SDValue FalseVal = Op.getOperand(2);
13191+
SDValue VL = Op.getOperand(3);
13192+
13193+
MVT MaskVT = VT.changeVectorElementType(MVT::i1);
13194+
SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
13195+
SDValue NewMask = DAG.getNode(RISCVISD::VMXOR_VL, DL, MaskVT,
13196+
Op.getOperand(0), AllOneMask, VL);
13197+
return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, NewMask, FalseVal, TrueVal,
13198+
DAG.getUNDEF(VT), VL);
13199+
}
13200+
1317913201
SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
1318013202
SelectionDAG &DAG) const {
1318113203
SDLoc DL(Op);

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ class RISCVTargetLowering : public TargetLowering {
477477
SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
478478

479479
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
480+
SDValue lowerVPSelectConstantFalse(SDValue Op, SelectionDAG &DAG) const;
480481
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
481482
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
482483
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ define <vscale x 1 x i8> @masked_load_passthru_nxv1i8(ptr %a, <vscale x 1 x i1>
3434
; ZVE32: # %bb.0:
3535
; ZVE32-NEXT: csrr a1, vlenb
3636
; ZVE32-NEXT: srli a1, a1, 3
37-
; ZVE32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
38-
; ZVE32-NEXT: vmv.v.i v8, 0
39-
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
37+
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
4038
; ZVE32-NEXT: vle8.v v8, (a0), v0.t
39+
; ZVE32-NEXT: vmnot.m v0, v0
40+
; ZVE32-NEXT: vmerge.vim v8, v8, 0, v0
4141
; ZVE32-NEXT: ret
4242
%load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> zeroinitializer)
4343
ret <vscale x 1 x i8> %load

llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,28 @@ define <vscale x 2 x i64> @select_nxv2i64(<vscale x 2 x i1> %a, <vscale x 2 x i6
470470
ret <vscale x 2 x i64> %v
471471
}
472472

473+
define <vscale x 2 x i64> @select_nxv2i64_constant_true(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, i32 zeroext %evl) {
474+
; CHECK-LABEL: select_nxv2i64_constant_true:
475+
; CHECK: # %bb.0:
476+
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
477+
; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
478+
; CHECK-NEXT: ret
479+
%v = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %a, <vscale x 2 x i64> splat (i64 -1), <vscale x 2 x i64> %b, i32 %evl)
480+
ret <vscale x 2 x i64> %v
481+
}
482+
483+
define <vscale x 2 x i64> @select_nxv2i64_constant_false(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, i32 zeroext %evl) {
484+
; CHECK-LABEL: select_nxv2i64_constant_false:
485+
; CHECK: # %bb.0:
486+
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
487+
; CHECK-NEXT: vmnot.m v0, v0
488+
; CHECK-NEXT: li a0, 100
489+
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
490+
; CHECK-NEXT: ret
491+
%v = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> splat (i64 100), i32 %evl)
492+
ret <vscale x 2 x i64> %v
493+
}
494+
473495
declare <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
474496

475497
define <vscale x 4 x i64> @select_nxv4i64(<vscale x 4 x i1> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c, i32 zeroext %evl) {
@@ -702,10 +724,10 @@ define <vscale x 16 x double> @select_nxv16f64(<vscale x 16 x i1> %a, <vscale x
702724
; CHECK-NEXT: and a4, a5, a4
703725
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
704726
; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
705-
; CHECK-NEXT: bltu a2, a1, .LBB48_2
727+
; CHECK-NEXT: bltu a2, a1, .LBB50_2
706728
; CHECK-NEXT: # %bb.1:
707729
; CHECK-NEXT: mv a2, a1
708-
; CHECK-NEXT: .LBB48_2:
730+
; CHECK-NEXT: .LBB50_2:
709731
; CHECK-NEXT: vmv1r.v v0, v7
710732
; CHECK-NEXT: addi a0, sp, 16
711733
; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload

0 commit comments

Comments
 (0)