Skip to content

Commit 8252b0c

Browse files
committed
[VPlan] Add transformation to narrow interleave groups.
This patch adds a new narrowInterleaveGroups transfrom, which tries convert a plan with interleave groups with VF elements to a plan that instead replaces the interleave groups with wide loads and stores processing VF elements. This effectively is a very simple form of loop-aware SLP, where we use interleave groups to identify candidates. This initial version is quite restricted and hopefully serves as a starting point for how to best model those kinds of transforms. Depends on #106431. Fixes #82936
1 parent 725a1e7 commit 8252b0c

File tree

5 files changed

+162
-49
lines changed

5 files changed

+162
-49
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7507,6 +7507,9 @@ LoopVectorizationPlanner::executePlan(
75077507
OrigLoop->getHeader()->getModule()->getContext());
75087508
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
75097509

7510+
if (VPlanTransforms::narrowInterleaveGroups(BestVPlan, BestVF)) {
7511+
LLVM_DEBUG(dbgs() << "Narrowed interleave\n");
7512+
}
75107513
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF
75117514
<< ", UF=" << BestUF << '\n');
75127515
BestVPlan.setName("Final VPlan");

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ extern cl::opt<unsigned> ForceTargetInstructionCost;
4848

4949
bool VPRecipeBase::mayWriteToMemory() const {
5050
switch (getVPDefID()) {
51+
case VPInstructionSC: {
52+
return !Instruction::isBinaryOp(cast<VPInstruction>(this)->getOpcode());
53+
}
5154
case VPInterleaveSC:
5255
return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
5356
case VPWidenStoreEVLSC:
@@ -63,6 +66,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
6366
case VPBranchOnMaskSC:
6467
case VPScalarIVStepsSC:
6568
case VPPredInstPHISC:
69+
case VPVectorPointerSC:
6670
return false;
6771
case VPBlendSC:
6872
case VPReductionEVLSC:

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
668668
void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
669669
unsigned BestUF,
670670
PredicatedScalarEvolution &PSE) {
671+
671672
assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
672673
assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
673674
VPBasicBlock *ExitingVPBB =
@@ -710,6 +711,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
710711
// TODO: Further simplifications are possible
711712
// 1. Replace inductions with constants.
712713
// 2. Replace vector loop region with VPBasicBlock.
714+
//
713715
}
714716

715717
/// Sink users of \p FOR after the recipe defining the previous value \p
@@ -1657,3 +1659,129 @@ void VPlanTransforms::createInterleaveGroups(
16571659
}
16581660
}
16591661
}
1662+
1663+
static bool supportedLoad(VPWidenRecipe *R0, VPValue *V, unsigned Idx) {
1664+
if (auto *W = dyn_cast_or_null<VPWidenLoadRecipe>(V->getDefiningRecipe())) {
1665+
if (W->getMask())
1666+
return false;
1667+
return !W->getMask() && (R0->getOperand(0) == V || R0->getOperand(1) == V);
1668+
}
1669+
1670+
if (auto *IR = dyn_cast_or_null<VPInterleaveRecipe>(V->getDefiningRecipe())) {
1671+
return IR->getInterleaveGroup()->getFactor() ==
1672+
IR->getInterleaveGroup()->getNumMembers() &&
1673+
IR->getVPValue(Idx) == V;
1674+
}
1675+
return false;
1676+
}
1677+
1678+
/// Returns true of \p IR is a consecutive interleave group with \p VF members.
1679+
static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *IR,
1680+
ElementCount VF) {
1681+
if (!IR)
1682+
return false;
1683+
auto IG = IR->getInterleaveGroup();
1684+
return IG->getFactor() == IG->getNumMembers() &&
1685+
IG->getNumMembers() == VF.getKnownMinValue();
1686+
}
1687+
1688+
bool VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF) {
1689+
using namespace llvm::VPlanPatternMatch;
1690+
if (VF.isScalable())
1691+
return false;
1692+
1693+
bool Changed = false;
1694+
SmallVector<VPInterleaveRecipe *> StoreGroups;
1695+
for (auto &R : make_early_inc_range(
1696+
*Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
1697+
if (match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())) ||
1698+
isa<VPCanonicalIVPHIRecipe>(&R))
1699+
continue;
1700+
1701+
// Bail out on recipes not supported at the moment:
1702+
// * phi recipes other than the canonical induction
1703+
// * recipes writing to memory except interleave groups
1704+
// Only support plans with a canonical induction phi.
1705+
if ((R.isPhi() && !isa<VPCanonicalIVPHIRecipe>(&R)) ||
1706+
(R.mayWriteToMemory() && !isa<VPInterleaveRecipe>(&R)))
1707+
return false;
1708+
1709+
auto *IR = dyn_cast<VPInterleaveRecipe>(&R);
1710+
if (!IR)
1711+
continue;
1712+
1713+
if (!isConsecutiveInterleaveGroup(IR, VF))
1714+
return false;
1715+
if (IR->getStoredValues().empty())
1716+
continue;
1717+
1718+
auto *Lane0 = dyn_cast_or_null<VPWidenRecipe>(
1719+
IR->getStoredValues()[0]->getDefiningRecipe());
1720+
if (!Lane0)
1721+
return false;
1722+
for (const auto &[I, V] : enumerate(IR->getStoredValues())) {
1723+
auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe());
1724+
if (!R || R->getOpcode() != Lane0->getOpcode())
1725+
return false;
1726+
// Work around captured structured bindings being a C++20 extension.
1727+
auto Idx = I;
1728+
if (any_of(R->operands(), [Lane0, Idx](VPValue *V) {
1729+
return !supportedLoad(Lane0, V, Idx);
1730+
}))
1731+
return false;
1732+
}
1733+
1734+
StoreGroups.push_back(IR);
1735+
}
1736+
1737+
// Narrow operation tree rooted at store groups.
1738+
for (auto *StoreGroup : StoreGroups) {
1739+
auto *Lane0 = cast<VPWidenRecipe>(
1740+
StoreGroup->getStoredValues()[0]->getDefiningRecipe());
1741+
1742+
unsigned LoadGroupIdx =
1743+
isa<VPInterleaveRecipe>(Lane0->getOperand(1)->getDefiningRecipe()) ? 1
1744+
: 0;
1745+
unsigned WideLoadIdx = 1 - LoadGroupIdx;
1746+
auto *LoadGroup = cast<VPInterleaveRecipe>(
1747+
Lane0->getOperand(LoadGroupIdx)->getDefiningRecipe());
1748+
1749+
auto *WideLoad = cast<VPWidenLoadRecipe>(
1750+
Lane0->getOperand(WideLoadIdx)->getDefiningRecipe());
1751+
1752+
// Narrow wide load to uniform scalar load, as transformed VPlan will only
1753+
// process one original iteration.
1754+
auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(),
1755+
WideLoad->operands(), true);
1756+
// Narrow interleave group to wide load, as transformed VPlan will only
1757+
// process one original iteration.
1758+
auto *L = new VPWidenLoadRecipe(
1759+
*cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()),
1760+
LoadGroup->getAddr(), LoadGroup->getMask(), true, false,
1761+
LoadGroup->getDebugLoc());
1762+
L->insertBefore(LoadGroup);
1763+
N->insertBefore(LoadGroup);
1764+
Lane0->setOperand(LoadGroupIdx, L);
1765+
Lane0->setOperand(WideLoadIdx, N);
1766+
1767+
auto *S = new VPWidenStoreRecipe(
1768+
*cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos()),
1769+
StoreGroup->getAddr(), Lane0, nullptr, true, false,
1770+
StoreGroup->getDebugLoc());
1771+
S->insertBefore(StoreGroup);
1772+
StoreGroup->eraseFromParent();
1773+
Changed = true;
1774+
}
1775+
1776+
if (!Changed)
1777+
return false;
1778+
1779+
// Adjust induction to reflect that the transformed plan only processes one
1780+
// original iteration.
1781+
auto *CanIV = Plan.getCanonicalIV();
1782+
VPInstruction *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
1783+
Inc->setOperand(
1784+
1, Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
1785+
removeDeadRecipes(Plan);
1786+
return true;
1787+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ struct VPlanTransforms {
121121

122122
/// Remove dead recipes from \p Plan.
123123
static void removeDeadRecipes(VPlan &Plan);
124+
125+
static bool narrowInterleaveGroups(VPlan &Plan, ElementCount VF);
124126
};
125127

126128
} // namespace llvm

0 commit comments

Comments
 (0)