Skip to content

Commit 6a2778c

Browse files
committed
[VPlan] Add transformation to narrow interleave groups.
This patch adds a new narrowInterleaveGroups transfrom, which tries convert a plan with interleave groups with VF elements to a plan that instead replaces the interleave groups with wide loads and stores processing VF elements. This effectively is a very simple form of loop-aware SLP, where we use interleave groups to identify candidates. This initial version is quite restricted and hopefully serves as a starting point for how to best model those kinds of transforms. Depends on #106431. Fixes #82936
1 parent 4568bc6 commit 6a2778c

File tree

5 files changed

+162
-49
lines changed

5 files changed

+162
-49
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -7487,6 +7487,9 @@ LoopVectorizationPlanner::executePlan(
74877487

74887488
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
74897489

7490+
if (VPlanTransforms::narrowInterleaveGroups(BestVPlan, BestVF)) {
7491+
LLVM_DEBUG(dbgs() << "Narrowed interleave\n");
7492+
}
74907493
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF
74917494
<< ", UF=" << BestUF << '\n');
74927495
BestVPlan.setName("Final VPlan");

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ extern cl::opt<unsigned> ForceTargetInstructionCost;
4747

4848
bool VPRecipeBase::mayWriteToMemory() const {
4949
switch (getVPDefID()) {
50+
case VPInstructionSC: {
51+
return !Instruction::isBinaryOp(cast<VPInstruction>(this)->getOpcode());
52+
}
5053
case VPInterleaveSC:
5154
return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
5255
case VPWidenStoreEVLSC:
@@ -62,6 +65,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
6265
case VPBranchOnMaskSC:
6366
case VPScalarIVStepsSC:
6467
case VPPredInstPHISC:
68+
case VPVectorPointerSC:
6569
return false;
6670
case VPBlendSC:
6771
case VPReductionEVLSC:

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+128
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
672672
void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
673673
unsigned BestUF,
674674
PredicatedScalarEvolution &PSE) {
675+
675676
assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
676677
assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
677678
VPBasicBlock *ExitingVPBB =
@@ -713,6 +714,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
713714
// TODO: Further simplifications are possible
714715
// 1. Replace inductions with constants.
715716
// 2. Replace vector loop region with VPBasicBlock.
717+
//
716718
}
717719

718720
/// Sink users of \p FOR after the recipe defining the previous value \p
@@ -1589,3 +1591,129 @@ void VPlanTransforms::createInterleaveGroups(
15891591
}
15901592
}
15911593
}
1594+
1595+
static bool supportedLoad(VPWidenRecipe *R0, VPValue *V, unsigned Idx) {
1596+
if (auto *W = dyn_cast_or_null<VPWidenLoadRecipe>(V->getDefiningRecipe())) {
1597+
if (W->getMask())
1598+
return false;
1599+
return !W->getMask() && (R0->getOperand(0) == V || R0->getOperand(1) == V);
1600+
}
1601+
1602+
if (auto *IR = dyn_cast_or_null<VPInterleaveRecipe>(V->getDefiningRecipe())) {
1603+
return IR->getInterleaveGroup()->getFactor() ==
1604+
IR->getInterleaveGroup()->getNumMembers() &&
1605+
IR->getVPValue(Idx) == V;
1606+
}
1607+
return false;
1608+
}
1609+
1610+
/// Returns true of \p IR is a consecutive interleave group with \p VF members.
1611+
static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *IR,
1612+
ElementCount VF) {
1613+
if (!IR)
1614+
return false;
1615+
auto IG = IR->getInterleaveGroup();
1616+
return IG->getFactor() == IG->getNumMembers() &&
1617+
IG->getNumMembers() == VF.getKnownMinValue();
1618+
}
1619+
1620+
bool VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF) {
1621+
using namespace llvm::VPlanPatternMatch;
1622+
if (VF.isScalable())
1623+
return false;
1624+
1625+
bool Changed = false;
1626+
SmallVector<VPInterleaveRecipe *> StoreGroups;
1627+
for (auto &R : make_early_inc_range(
1628+
*Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
1629+
if (match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())) ||
1630+
isa<VPCanonicalIVPHIRecipe>(&R))
1631+
continue;
1632+
1633+
// Bail out on recipes not supported at the moment:
1634+
// * phi recipes other than the canonical induction
1635+
// * recipes writing to memory except interleave groups
1636+
// Only support plans with a canonical induction phi.
1637+
if ((R.isPhi() && !isa<VPCanonicalIVPHIRecipe>(&R)) ||
1638+
(R.mayWriteToMemory() && !isa<VPInterleaveRecipe>(&R)))
1639+
return false;
1640+
1641+
auto *IR = dyn_cast<VPInterleaveRecipe>(&R);
1642+
if (!IR)
1643+
continue;
1644+
1645+
if (!isConsecutiveInterleaveGroup(IR, VF))
1646+
return false;
1647+
if (IR->getStoredValues().empty())
1648+
continue;
1649+
1650+
auto *Lane0 = dyn_cast_or_null<VPWidenRecipe>(
1651+
IR->getStoredValues()[0]->getDefiningRecipe());
1652+
if (!Lane0)
1653+
return false;
1654+
for (const auto &[I, V] : enumerate(IR->getStoredValues())) {
1655+
auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe());
1656+
if (!R || R->getOpcode() != Lane0->getOpcode())
1657+
return false;
1658+
// Work around captured structured bindings being a C++20 extension.
1659+
auto Idx = I;
1660+
if (any_of(R->operands(), [Lane0, Idx](VPValue *V) {
1661+
return !supportedLoad(Lane0, V, Idx);
1662+
}))
1663+
return false;
1664+
}
1665+
1666+
StoreGroups.push_back(IR);
1667+
}
1668+
1669+
// Narrow operation tree rooted at store groups.
1670+
for (auto *StoreGroup : StoreGroups) {
1671+
auto *Lane0 = cast<VPWidenRecipe>(
1672+
StoreGroup->getStoredValues()[0]->getDefiningRecipe());
1673+
1674+
unsigned LoadGroupIdx =
1675+
isa<VPInterleaveRecipe>(Lane0->getOperand(1)->getDefiningRecipe()) ? 1
1676+
: 0;
1677+
unsigned WideLoadIdx = 1 - LoadGroupIdx;
1678+
auto *LoadGroup = cast<VPInterleaveRecipe>(
1679+
Lane0->getOperand(LoadGroupIdx)->getDefiningRecipe());
1680+
1681+
auto *WideLoad = cast<VPWidenLoadRecipe>(
1682+
Lane0->getOperand(WideLoadIdx)->getDefiningRecipe());
1683+
1684+
// Narrow wide load to uniform scalar load, as transformed VPlan will only
1685+
// process one original iteration.
1686+
auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(),
1687+
WideLoad->operands(), true);
1688+
// Narrow interleave group to wide load, as transformed VPlan will only
1689+
// process one original iteration.
1690+
auto *L = new VPWidenLoadRecipe(
1691+
*cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
1692+
LoadGroup->getAddr(), LoadGroup->getMask(), true, false,
1693+
LoadGroup->getDebugLoc());
1694+
L->insertBefore(LoadGroup);
1695+
N->insertBefore(LoadGroup);
1696+
Lane0->setOperand(LoadGroupIdx, L);
1697+
Lane0->setOperand(WideLoadIdx, N);
1698+
1699+
auto *S = new VPWidenStoreRecipe(
1700+
*cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos()),
1701+
StoreGroup->getAddr(), Lane0, nullptr, true, false,
1702+
StoreGroup->getDebugLoc());
1703+
S->insertBefore(StoreGroup);
1704+
StoreGroup->eraseFromParent();
1705+
Changed = true;
1706+
}
1707+
1708+
if (!Changed)
1709+
return false;
1710+
1711+
// Adjust induction to reflect that the transformed plan only processes one
1712+
// original iteration.
1713+
auto *CanIV = Plan.getCanonicalIV();
1714+
VPInstruction *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
1715+
Inc->setOperand(
1716+
1, Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
1717+
removeDeadRecipes(Plan);
1718+
return true;
1719+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

+2
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ struct VPlanTransforms {
116116
const SmallPtrSetImpl<const InterleaveGroup<Instruction> *>
117117
&InterleaveGroups,
118118
VPRecipeBuilder &RecipeBuilder, bool ScalarEpilogueAllowed);
119+
120+
static bool narrowInterleaveGroups(VPlan &Plan, ElementCount VF);
119121
};
120122

121123
} // namespace llvm

0 commit comments

Comments
 (0)