Skip to content

Commit be5e3c0

Browse files
committed
[VPlan] Simplify vp.merge true, (or x, y), x -> vp.merge y, true, x
With EVL tail folding an AnyOf reduction will emit a vp.merge like vp.merge true, (or phi, cond), phi, evl We can remove the or and optimise this to vp.merge cond, true, phi, evl Which makes it slightly easier to pattern match in llvm#134898. This adds a pattern matcher for VPWidenIntrinsicRecipe to help match this (only 4-ary intrinsics for now, can be extended if other users need) Blended AnyOf reductions will emit use an and, which we may also be able to simplify in a later patch.
1 parent 8fddef8 commit be5e3c0

File tree

4 files changed

+44
-8
lines changed

4 files changed

+44
-8
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ struct Recipe_match {
199199
std::is_same<RecipeTy, VPDerivedIVRecipe>::value ||
200200
std::is_same<RecipeTy, VPWidenGEPRecipe>::value)
201201
return DefR;
202+
else if constexpr (std::is_same<RecipeTy, VPWidenIntrinsicRecipe>::value)
203+
return DefR && DefR->getVectorIntrinsicID() == Opcode;
202204
else
203205
return DefR && DefR->getOpcode() == Opcode;
204206
}
@@ -439,6 +441,33 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
439441
return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});
440442
}
441443

444+
template <Intrinsic::ID IntrinsicID, typename... OpTys>
445+
using VPWidenIntrinsicMatch = Recipe_match<std::tuple<OpTys...>, IntrinsicID,
446+
false, VPWidenIntrinsicRecipe>;
447+
448+
template <Intrinsic::ID IntrinsicID, typename Op0_t, typename Op1_t,
449+
typename Op2_t, typename Op3_t>
450+
inline VPWidenIntrinsicMatch<IntrinsicID, Op0_t, Op1_t, Op2_t, Op3_t>
451+
m_WidenIntrinsic(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2,
452+
const Op3_t &Op3) {
453+
return VPWidenIntrinsicMatch<IntrinsicID, Op0_t, Op1_t, Op2_t, Op3_t>(
454+
{Op0, Op1, Op2, Op3});
455+
}
456+
457+
/// Intrinsic matchers.
458+
struct IntrinsicID_match {
459+
unsigned ID;
460+
461+
IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
462+
463+
template <typename OpTy> bool match(OpTy *V) {
464+
if (const auto *CI = dyn_cast<CallInst>(V))
465+
if (const auto *F = CI->getCalledFunction())
466+
return F->getIntrinsicID() == ID;
467+
return false;
468+
}
469+
};
470+
442471
} // namespace VPlanPatternMatch
443472
} // namespace llvm
444473

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10111011
TypeInfo.inferScalarType(R.getOperand(1)) ==
10121012
TypeInfo.inferScalarType(R.getVPSingleValue()))
10131013
return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
1014+
1015+
// For i1 vp.merges produced by AnyOf reductions:
1016+
// vp.merge true, (or x, y), x, evl -> vp.merge y, true, x, evl
1017+
if (match(&R, m_WidenIntrinsic<Intrinsic::vp_merge>(
1018+
m_True(), m_c_BinaryOr(m_VPValue(X), m_VPValue(Y)),
1019+
m_Deferred(X), m_VPValue())) &&
1020+
TypeInfo.inferScalarType(R.getVPSingleValue())->isIntegerTy(1)) {
1021+
R.setOperand(1, R.getOperand(0));
1022+
R.setOperand(0, Y);
1023+
return;
1024+
}
10141025
}
10151026

10161027
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,8 +1899,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
18991899
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
19001900
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
19011901
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
1902-
; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
1903-
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
1902+
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
19041903
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
19051904
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
19061905
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
@@ -2024,8 +2023,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
20242023
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
20252024
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
20262025
; IF-EVL-NEXT: [[TMP14:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
2027-
; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
2028-
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
2026+
; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
20292027
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
20302028
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
20312029
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1953,8 +1953,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
19531953
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
19541954
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
19551955
; IF-EVL-NEXT: [[TMP13:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
1956-
; IF-EVL-NEXT: [[TMP14:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP13]]
1957-
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
1956+
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
19581957
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
19591958
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
19601959
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
@@ -2078,8 +2077,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
20782077
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
20792078
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
20802079
; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
2081-
; IF-EVL-NEXT: [[TMP14:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP13]]
2082-
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
2080+
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
20832081
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
20842082
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
20852083
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]

0 commit comments

Comments
 (0)