Skip to content

Commit 99741ac

Browse files
authored
[VPlan] Introduce explicit ExtractFromEnd recipes for live-outs. (#100658)
Introduce explicit ExtractFromEnd recipes to extract the final values for live-outs instead of implicitly extracting in VPLiveOut::fixPhi. This is a follow-up to the recent changes of modeling extracts for recurrences and consolidates live-out extract creation for fixed-order recurrences at a single place: addLiveOutsForFirstOrderRecurrences. It is also in preparation of replacing VPLiveOut with VPIRInstructions wrapping the original scalar phis. PR: #100658
1 parent 25ffd2e commit 99741ac

13 files changed

+224
-169
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 174 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8527,9 +8527,11 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
85278527
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
85288528
}
85298529

8530-
// Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the
8531-
// original exit block.
8532-
static void addUsersInExitBlock(
8530+
// Collect (ExitPhi, ExitingValue) pairs phis in the original exit block that
8531+
// are modeled in VPlan. Some exiting values are not modeled explicitly yet and
8532+
// won't be included. Those are un-truncated VPWidenIntOrFpInductionRecipe,
8533+
// VPWidenPointerInductionRecipe and induction increments.
8534+
static MapVector<PHINode *, VPValue *> collectUsersInExitBlock(
85338535
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
85348536
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
85358537
auto MiddleVPBB =
@@ -8538,9 +8540,8 @@ static void addUsersInExitBlock(
85388540
// and there is nothing to fix from vector loop; phis should have incoming
85398541
// from scalar loop only.
85408542
if (MiddleVPBB->getNumSuccessors() != 2)
8541-
return;
8542-
8543-
// Introduce VPUsers modeling the exit values.
8543+
return {};
8544+
MapVector<PHINode *, VPValue *> ExitingValuesToFix;
85448545
BasicBlock *ExitBB =
85458546
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
85468547
BasicBlock *ExitingBB = OrigLoop->getExitingBlock();
@@ -8561,15 +8562,52 @@ static void addUsersInExitBlock(
85618562
return P && Inductions.contains(P);
85628563
})))
85638564
continue;
8564-
Plan.addLiveOut(&ExitPhi, V);
8565+
ExitingValuesToFix.insert({&ExitPhi, V});
85658566
}
8567+
return ExitingValuesToFix;
85668568
}
85678569

8568-
/// Feed a resume value for every FOR from the vector loop to the scalar loop,
8569-
/// if middle block branches to scalar preheader, by introducing ExtractFromEnd
8570-
/// and ResumePhi recipes in each, respectively, and a VPLiveOut which uses the
8571-
/// latter and corresponds to the scalar header.
8572-
static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
8570+
// Add exit values to \p Plan. Extracts and VPLiveOuts are added for each entry
8571+
// in \p ExitingValuesToFix.
8572+
static void
8573+
addUsersInExitBlock(VPlan &Plan,
8574+
MapVector<PHINode *, VPValue *> &ExitingValuesToFix) {
8575+
if (ExitingValuesToFix.empty())
8576+
return;
8577+
8578+
auto MiddleVPBB =
8579+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8580+
BasicBlock *ExitBB =
8581+
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
8582+
// TODO: set B to MiddleVPBB->getFirstNonPhi(), taking care of affected tests.
8583+
VPBuilder B(MiddleVPBB);
8584+
if (auto *Terminator = MiddleVPBB->getTerminator()) {
8585+
auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0));
8586+
assert((!Condition || Condition->getParent() == MiddleVPBB) &&
8587+
"Condition expected in MiddleVPBB");
8588+
B.setInsertPoint(Condition ? Condition : Terminator);
8589+
}
8590+
8591+
// Introduce VPUsers modeling the exit values.
8592+
for (const auto &[ExitPhi, V] : ExitingValuesToFix) {
8593+
VPValue *Ext = B.createNaryOp(
8594+
VPInstruction::ExtractFromEnd,
8595+
{V, Plan.getOrAddLiveIn(ConstantInt::get(
8596+
IntegerType::get(ExitBB->getContext(), 32), 1))});
8597+
Plan.addLiveOut(ExitPhi, Ext);
8598+
}
8599+
}
8600+
8601+
/// Handle live-outs for first order reductions, both in the scalar preheader
8602+
/// and the original exit block:
8603+
/// 1. Feed a resume value for every FOR from the vector loop to the scalar
8604+
/// loop, if middle block branches to scalar preheader, by introducing
8605+
/// ExtractFromEnd and ResumePhi recipes in each, respectively, and a
8606+
/// VPLiveOut which uses the latter and corresponds to the scalar header.
8607+
/// 2. Feed the penultimate value of recurrences to their LCSSA phi users in
8608+
/// the original exit block using a VPLiveOut.
8609+
static void addLiveOutsForFirstOrderRecurrences(
8610+
VPlan &Plan, MapVector<PHINode *, VPValue *> &ExitingValuesToFix) {
85738611
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
85748612

85758613
// Start by finding out if middle block branches to scalar preheader, which is
@@ -8578,21 +8616,31 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
85788616
// TODO: Should be replaced by
85798617
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
85808618
// scalar region is modeled as well.
8581-
VPBasicBlock *ScalarPHVPBB = nullptr;
85828619
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8583-
for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) {
8584-
if (isa<VPIRBasicBlock>(Succ))
8585-
continue;
8586-
assert(!ScalarPHVPBB && "Two candidates for ScalarPHVPBB?");
8587-
ScalarPHVPBB = cast<VPBasicBlock>(Succ);
8620+
BasicBlock *ExitBB = nullptr;
8621+
VPBasicBlock *ScalarPHVPBB = nullptr;
8622+
if (MiddleVPBB->getNumSuccessors() == 2) {
8623+
// Order is strict: first is the exit block, second is the scalar preheader.
8624+
ExitBB =
8625+
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
8626+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8627+
} else if (ExitingValuesToFix.empty()) {
8628+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
8629+
} else {
8630+
ExitBB = cast<VPIRBasicBlock>(MiddleVPBB->getSingleSuccessor())
8631+
->getIRBasicBlock();
85888632
}
8589-
if (!ScalarPHVPBB)
8633+
if (!ScalarPHVPBB) {
8634+
assert(ExitingValuesToFix.empty() &&
8635+
"missed inserting extracts for exiting values");
85908636
return;
8637+
}
85918638

85928639
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
85938640
VPBuilder MiddleBuilder(MiddleVPBB);
85948641
// Reset insert point so new recipes are inserted before terminator and
85958642
// condition, if there is either the former or both.
8643+
// TODO: set MiddleBuilder to MiddleVPBB->getFirstNonPhi().
85968644
if (auto *Terminator = MiddleVPBB->getTerminator()) {
85978645
auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0));
85988646
assert((!Condition || Condition->getParent() == MiddleVPBB) &&
@@ -8601,20 +8649,110 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
86018649
}
86028650
VPValue *OneVPV = Plan.getOrAddLiveIn(
86038651
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
8652+
VPValue *TwoVPV = Plan.getOrAddLiveIn(
8653+
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));
86048654

86058655
for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) {
86068656
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
86078657
if (!FOR)
86088658
continue;
86098659

8660+
// This is the second phase of vectorizing first-order recurrences, creating
8661+
// extract for users outside the loop. An overview of the transformation is
8662+
// described below. Suppose we have the following loop with some use after
8663+
// the loop of the last a[i-1],
8664+
//
8665+
// for (int i = 0; i < n; ++i) {
8666+
// t = a[i - 1];
8667+
// b[i] = a[i] - t;
8668+
// }
8669+
// use t;
8670+
//
8671+
// There is a first-order recurrence on "a". For this loop, the shorthand
8672+
// scalar IR looks like:
8673+
//
8674+
// scalar.ph:
8675+
// s.init = a[-1]
8676+
// br scalar.body
8677+
//
8678+
// scalar.body:
8679+
// i = phi [0, scalar.ph], [i+1, scalar.body]
8680+
// s1 = phi [s.init, scalar.ph], [s2, scalar.body]
8681+
// s2 = a[i]
8682+
// b[i] = s2 - s1
8683+
// br cond, scalar.body, exit.block
8684+
//
8685+
// exit.block:
8686+
// use = lcssa.phi [s1, scalar.body]
8687+
//
8688+
// In this example, s1 is a recurrence because it's value depends on the
8689+
// previous iteration. In the first phase of vectorization, we created a
8690+
// VPFirstOrderRecurrencePHIRecipe v1 for s1. Now we create the extracts
8691+
// for users in the scalar preheader and exit block.
8692+
//
8693+
// vector.ph:
8694+
// v_init = vector(..., ..., ..., a[-1])
8695+
// br vector.body
8696+
//
8697+
// vector.body
8698+
// i = phi [0, vector.ph], [i+4, vector.body]
8699+
// v1 = phi [v_init, vector.ph], [v2, vector.body]
8700+
// v2 = a[i, i+1, i+2, i+3]
8701+
// b[i] = v2 - v1
8702+
// // Next, third phase will introduce v1' = splice(v1(3), v2(0, 1, 2))
8703+
// b[i, i+1, i+2, i+3] = v2 - v1
8704+
// br cond, vector.body, middle.block
8705+
//
8706+
// middle.block:
8707+
// vector.recur.extract.for.phi = v2(2)
8708+
// vector.recur.extract = v2(3)
8709+
// br cond, scalar.ph, exit.block
8710+
//
8711+
// scalar.ph:
8712+
// scalar.recur.init = phi [vector.recur.extract, middle.block],
8713+
// [s.init, otherwise]
8714+
// br scalar.body
8715+
//
8716+
// scalar.body:
8717+
// i = phi [0, scalar.ph], [i+1, scalar.body]
8718+
// s1 = phi [scalar.recur.init, scalar.ph], [s2, scalar.body]
8719+
// s2 = a[i]
8720+
// b[i] = s2 - s1
8721+
// br cond, scalar.body, exit.block
8722+
//
8723+
// exit.block:
8724+
// lo = lcssa.phi [s1, scalar.body],
8725+
// [vector.recur.extract.for.phi, middle.block]
8726+
//
86108727
// Extract the resume value and create a new VPLiveOut for it.
86118728
auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd,
86128729
{FOR->getBackedgeValue(), OneVPV},
86138730
{}, "vector.recur.extract");
86148731
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
86158732
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
86168733
"scalar.recur.init");
8617-
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), ResumePhiRecipe);
8734+
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8735+
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
8736+
8737+
// Now create VPLiveOuts for users in the exit block.
8738+
// Extract the penultimate value of the recurrence and add VPLiveOut
8739+
// users of the recurrence splice.
8740+
8741+
// No edge from the middle block to the unique exit block has been inserted
8742+
// and there is nothing to fix from vector loop; phis should have incoming
8743+
// from scalar loop only.
8744+
if (ExitingValuesToFix.empty())
8745+
continue;
8746+
for (User *U : FORPhi->users()) {
8747+
auto *UI = cast<Instruction>(U);
8748+
if (UI->getParent() != ExitBB)
8749+
continue;
8750+
VPValue *Ext = MiddleBuilder.createNaryOp(
8751+
VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {},
8752+
"vector.recur.extract.for.phi");
8753+
Plan.addLiveOut(cast<PHINode>(UI), Ext);
8754+
ExitingValuesToFix.erase(cast<PHINode>(UI));
8755+
}
86188756
}
86198757
}
86208758

@@ -8769,16 +8907,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
87698907
// After here, VPBB should not be used.
87708908
VPBB = nullptr;
87718909

8772-
addUsersInExitBlock(OrigLoop, RecipeBuilder, *Plan,
8773-
Legal->getInductionVars());
8774-
87758910
assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
87768911
!Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
87778912
"entry block must be set to a VPRegionBlock having a non-empty entry "
87788913
"VPBasicBlock");
87798914
RecipeBuilder.fixHeaderPhis();
87808915

8781-
addLiveOutsForFirstOrderRecurrences(*Plan);
8916+
MapVector<PHINode *, VPValue *> ExitingValuesToFix = collectUsersInExitBlock(
8917+
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
8918+
8919+
addLiveOutsForFirstOrderRecurrences(*Plan, ExitingValuesToFix);
8920+
addUsersInExitBlock(*Plan, ExitingValuesToFix);
87828921

87838922
// ---------------------------------------------------------------------------
87848923
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -8931,6 +9070,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
89319070
// iteration. The final value is selected by the final ComputeReductionResult.
89329071
void LoopVectorizationPlanner::adjustRecipesForReductions(
89339072
VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) {
9073+
using namespace VPlanPatternMatch;
89349074
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion();
89359075
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
89369076
// Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
@@ -8988,10 +9128,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
89889128
for (unsigned I = 0; I != Worklist.size(); ++I) {
89899129
VPSingleDefRecipe *Cur = Worklist[I];
89909130
for (VPUser *U : Cur->users()) {
8991-
auto *UserRecipe = dyn_cast<VPSingleDefRecipe>(U);
8992-
if (!UserRecipe) {
8993-
assert(isa<VPLiveOut>(U) &&
8994-
"U must either be a VPSingleDef or VPLiveOut");
9131+
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9132+
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
9133+
assert(match(U, m_Binary<VPInstruction::ExtractFromEnd>(
9134+
m_VPValue(), m_VPValue())) &&
9135+
"U must be an ExtractFromEnd VPInstruction");
89959136
continue;
89969137
}
89979138
Worklist.insert(UserRecipe);
@@ -9208,9 +9349,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
92089349
auto *FinalReductionResult = new VPInstruction(
92099350
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
92109351
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
9211-
OrigExitingVPV->replaceUsesWithIf(
9212-
FinalReductionResult,
9213-
[](VPUser &User, unsigned) { return isa<VPLiveOut>(&User); });
9352+
OrigExitingVPV->replaceUsesWithIf(FinalReductionResult, [](VPUser &User,
9353+
unsigned) {
9354+
return match(&User, m_Binary<VPInstruction::ExtractFromEnd>(m_VPValue(),
9355+
m_VPValue()));
9356+
});
92149357
}
92159358

92169359
VPlanTransforms::clearReductionWrapFlags(*Plan);

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
2929
return const_cast<Pattern &>(P).match(V);
3030
}
3131

32+
template <typename Pattern> bool match(VPUser *U, const Pattern &P) {
33+
auto *R = dyn_cast<VPRecipeBase>(U);
34+
return R && match(R, P);
35+
}
36+
3237
template <typename Class> struct class_match {
3338
template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
3439
};

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
194194

195195
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
196196
VPValue *ExitValue = getOperand(0);
197-
auto Lane = vputils::isUniformAfterVectorization(ExitValue)
198-
? VPLane::getFirstLane()
199-
: VPLane::getLastLaneForVF(State.VF);
200197
VPBasicBlock *MiddleVPBB =
201198
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
202199
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
@@ -207,10 +204,7 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
207204
? MiddleVPBB
208205
: ExitingVPBB;
209206
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
210-
// Set insertion point in PredBB in case an extract needs to be generated.
211-
// TODO: Model extracts explicitly.
212-
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
213-
Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
207+
Value *V = State.get(ExitValue, VPIteration(0, 0));
214208
if (Phi->getBasicBlockIndex(PredBB) != -1)
215209
Phi->setIncomingValueForBlock(PredBB, V);
216210
else

0 commit comments

Comments
 (0)