Skip to content

Commit f028ab3

Browse files
committed
!fixup address latest comments, thansk!
1 parent 9f53c00 commit f028ab3

File tree

5 files changed

+91
-105
lines changed

5 files changed

+91
-105
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 54 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -3290,17 +3290,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
32903290
if (EnableVPlanNativePath)
32913291
fixNonInductionPHIs(Plan, State);
32923292

3293-
// At this point every instruction in the original loop is widened to a
3294-
// vector form. Note that fixing reduction phis, as well as extracting the
3295-
// exit and resume values for fixed-order recurrences are already modeled in
3296-
// VPlan. All that remains to do here is to create a phi in the scalar
3297-
// pre-header for each fixed-order recurrence resume value.
3298-
// TODO: Also model creating phis in the scalar pre-header in VPlan.
3299-
for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) {
3300-
if (!Legal->isFixedOrderRecurrence(LO->getPhi()))
3301-
continue;
3302-
}
3303-
33043293
// Forget the original basic block.
33053294
PSE.getSE()->forgetLoop(OrigLoop);
33063295
PSE.getSE()->forgetBlockAndLoopDispositions();
@@ -3337,8 +3326,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
33373326
VectorLoop->getHeader(), Plan, State);
33383327
}
33393328

3340-
// Fix LCSSA phis not already fixed earlier. Extracts may need to be generated
3341-
// in the exit block, so update the builder.
33423329
State.Builder.SetInsertPoint(State.CFG.ExitBB,
33433330
State.CFG.ExitBB->getFirstNonPHIIt());
33443331
for (const auto &KV : Plan.getLiveOuts())
@@ -8485,9 +8472,59 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop,
84858472
Value *IncomingValue =
84868473
ExitPhi.getIncomingValueForBlock(ExitingBB);
84878474
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan);
8488-
Plan.addLiveOut(
8489-
&ExitPhi, V,
8490-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()));
8475+
Plan.addLiveOut(&ExitPhi, V);
8476+
}
8477+
}
8478+
8479+
/// Feed a resume value for every FOR from the vector loop to the scalar loop,
8480+
/// if middle block branches to scalar preheader, by introducing ExtractFromEnd
8481+
/// and ResumePhi recipes in each, respectively, and a VPLiveOut which uses the
8482+
/// latter and corresponds to the scalar header.
8483+
static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
8484+
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
8485+
8486+
// Start by finding out if middle block branches to scalar preheader.
8487+
// TODO: Should be replaced by
8488+
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8489+
// scalar region is modeled as well.
8490+
VPBasicBlock *ScalarPHVPBB = nullptr;
8491+
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8492+
for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) {
8493+
if (isa<VPIRBasicBlock>(Succ))
8494+
continue;
8495+
assert(!ScalarPHVPBB && "Two candidates for ScalarPHVPBB?");
8496+
ScalarPHVPBB = cast<VPBasicBlock>(Succ);
8497+
}
8498+
if (!ScalarPHVPBB)
8499+
return;
8500+
8501+
for (auto &H : VectorRegion->getEntryBasicBlock()->phis()) {
8502+
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&H);
8503+
if (!FOR)
8504+
continue;
8505+
8506+
VPBuilder B(ScalarPHVPBB);
8507+
VPBuilder MiddleBuilder(MiddleVPBB);
8508+
// Reset insert point so new recipes are inserted before terminator and
8509+
// condition, if there is either the former or both.
8510+
if (auto *Terminator = MiddleVPBB->getTerminator()) {
8511+
auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0));
8512+
assert((!Condition || Condition->getParent() == MiddleVPBB) &&
8513+
"Condition expected in MiddleVPBB");
8514+
MiddleBuilder.setInsertPoint(Condition ? Condition : Terminator);
8515+
}
8516+
8517+
// Extract the resume value and create a new VPLiveOut for it.
8518+
auto *Resume = MiddleBuilder.createNaryOp(
8519+
VPInstruction::ExtractFromEnd,
8520+
{FOR->getBackedgeValue(),
8521+
Plan.getOrAddLiveIn(
8522+
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1))},
8523+
{}, "vector.recur.extract");
8524+
auto *ResumePhiRecipe =
8525+
B.createNaryOp(VPInstruction::ResumePhi, {Resume, FOR->getStartValue()},
8526+
{}, "scalar.recur.init");
8527+
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), ResumePhiRecipe);
84918528
}
84928529
}
84938530

@@ -8655,48 +8692,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
86558692
"VPBasicBlock");
86568693
RecipeBuilder.fixHeaderPhis();
86578694

8658-
auto *MiddleVPBB =
8659-
cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getSingleSuccessor());
8660-
8661-
VPBasicBlock *ScalarPH = nullptr;
8662-
for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) {
8663-
auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
8664-
if (VPBB && !isa<VPIRBasicBlock>(VPBB)) {
8665-
ScalarPH = VPBB;
8666-
break;
8667-
}
8668-
}
8669-
8670-
if (ScalarPH) {
8671-
for (auto &H : HeaderVPBB->phis()) {
8672-
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&H);
8673-
if (!FOR)
8674-
continue;
8675-
VPBuilder B(ScalarPH);
8676-
VPBuilder MiddleBuilder;
8677-
// Set insert point so new recipes are inserted before terminator and
8678-
// condition, if there is either the former or both.
8679-
if (MiddleVPBB->getNumSuccessors() != 2)
8680-
MiddleBuilder.setInsertPoint(MiddleVPBB);
8681-
else if (isa<VPInstruction>(MiddleVPBB->getTerminator()->getOperand(0)))
8682-
MiddleBuilder.setInsertPoint(
8683-
&*std::prev(MiddleVPBB->getTerminator()->getIterator()));
8684-
else
8685-
MiddleBuilder.setInsertPoint(MiddleVPBB->getTerminator());
8686-
8687-
// Extract the resume value and create a new VPLiveOut for it.
8688-
auto *Resume = MiddleBuilder.createNaryOp(
8689-
VPInstruction::ExtractFromEnd,
8690-
{FOR->getBackedgeValue(),
8691-
Plan->getOrAddLiveIn(
8692-
ConstantInt::get(Plan->getCanonicalIV()->getScalarType(), 1))},
8693-
{}, "vector.recur.extract");
8694-
auto *R =
8695-
B.createNaryOp(VPInstruction::ExitPhi, {Resume, FOR->getStartValue()},
8696-
{}, "scalar.recur.init");
8697-
Plan->addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), R, ScalarPH);
8698-
}
8699-
}
8695+
addLiveOutsForFirstOrderRecurrences(*Plan);
87008696

87018697
// ---------------------------------------------------------------------------
87028698
// Transform initial VPlan: Apply previously taken decisions, in order, to

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -915,11 +915,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
915915
/// VPBB, if any, are rewired to the new VPIRBasicBlock.
916916
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
917917
VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB);
918-
for (auto &[_, LO] : VPBB->getPlan()->getLiveOuts()) {
919-
if (LO->getPred() == VPBB)
920-
LO->setPred(IRMiddleVPBB);
921-
}
922-
923918
for (auto &R : make_early_inc_range(*VPBB))
924919
R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end());
925920
VPBlockBase *PredVPBB = VPBB->getSinglePredecessor();
@@ -1135,9 +1130,9 @@ LLVM_DUMP_METHOD
11351130
void VPlan::dump() const { print(dbgs()); }
11361131
#endif
11371132

1138-
void VPlan::addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred) {
1133+
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
11391134
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1140-
LiveOuts.insert({PN, new VPLiveOut(PN, V, Pred)});
1135+
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
11411136
}
11421137

11431138
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
@@ -1206,18 +1201,9 @@ VPlan *VPlan::duplicate() {
12061201
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12071202
remapOperands(Entry, NewEntry, Old2NewVPValues);
12081203

1209-
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
1210-
VPBlockBase *OldMiddle = getVectorLoopRegion()->getSingleSuccessor();
1211-
VPBlockBase *NewMiddle = NewPlan->getVectorLoopRegion()->getSingleSuccessor();
1212-
Old2NewVPBlocks[OldMiddle] = NewMiddle;
1213-
for (const auto &[Old, New] :
1214-
zip(OldMiddle->getSuccessors(), NewMiddle->getSuccessors()))
1215-
Old2NewVPBlocks[Old] = New;
1216-
12171204
// Clone live-outs.
12181205
for (const auto &[_, LO] : LiveOuts)
1219-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)],
1220-
cast<VPBasicBlock>(Old2NewVPBlocks[LO->getPred()]));
1206+
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
12211207

12221208
// Initialize remaining fields of cloned VPlan.
12231209
NewPlan->VFs = VFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -682,27 +682,23 @@ class VPBlockBase {
682682
};
683683

684684
/// A value that is used outside the VPlan. The operand of the user needs to be
685-
/// added to the associated LCSSA phi node.
685+
/// added to the associated LCSSA phi node. The incoming block from VPlan is
686+
/// determined by where the VPValue is defined: if it is defined by a recipe
687+
/// outside a region, its parent block is used, otherwise the middle block is
688+
/// used.
686689
class VPLiveOut : public VPUser {
687690
PHINode *Phi;
688691

689-
/// Predecessor in VPlan of this live-out. Used to as block to set the
690-
/// incoming value for.
691-
VPBasicBlock *Pred;
692-
693692
public:
694-
VPLiveOut(PHINode *Phi, VPValue *Op, VPBasicBlock *Pred)
695-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi), Pred(Pred) {}
693+
VPLiveOut(PHINode *Phi, VPValue *Op)
694+
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
696695

697696
static inline bool classof(const VPUser *U) {
698697
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
699698
}
700699

701-
/// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
702-
/// means we need to add the appropriate incoming value from the middle
703-
/// block as exiting edges from the scalar epilogue loop (if present) are
704-
/// already in place, and we exit the vector loop exclusively to the middle
705-
/// block.
700+
/// Fixup the wrapped LCSSA phi node. This means we need to add the
701+
/// appropriate incoming value from the precessor Pred.
706702
void fixPhi(VPlan &Plan, VPTransformState &State);
707703

708704
/// Returns true if the VPLiveOut uses scalars of operand \p Op.
@@ -714,11 +710,6 @@ class VPLiveOut : public VPUser {
714710

715711
PHINode *getPhi() const { return Phi; }
716712

717-
/// Returns to incoming block for which to set the value.
718-
VPBasicBlock *getPred() const { return Pred; }
719-
720-
void setPred(VPBasicBlock *Pred) { this->Pred = Pred; }
721-
722713
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
723714
/// Print the VPLiveOut to \p O.
724715
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -1202,8 +1193,9 @@ class VPInstruction : public VPRecipeWithIRFlags {
12021193
ExplicitVectorLength,
12031194
/// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
12041195
/// The first operand is the incoming value from the predecessor in VPlan,
1205-
/// the second operand is the incoming value for all other predecessors.
1206-
ExitPhi,
1196+
/// the second operand is the incoming value for all other predecessors
1197+
/// (which are currently not modeled in VPlan).
1198+
ResumePhi,
12071199
CalculateTripCountMinusVF,
12081200
// Increment the canonical IV separately for each unrolled part.
12091201
CanonicalIVIncrementForPart,
@@ -3346,7 +3338,7 @@ class VPlan {
33463338
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
33473339
}
33483340

3349-
void addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred);
3341+
void addLiveOut(PHINode *PN, VPValue *V);
33503342

33513343
void removeLiveOut(PHINode *PN) {
33523344
delete LiveOuts[PN];

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,13 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
193193
auto Lane = vputils::isUniformAfterVectorization(ExitValue)
194194
? VPLane::getFirstLane()
195195
: VPLane::getLastLaneForVF(State.VF);
196-
BasicBlock *PredBB = State.CFG.VPBB2IRBB[Pred];
196+
VPBasicBlock *PredVPBB =
197+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
198+
VPRecipeBase *DefRecipe = ExitValue->getDefiningRecipe();
199+
if (DefRecipe && !DefRecipe->getParent()->getParent())
200+
PredVPBB = DefRecipe->getParent();
201+
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
202+
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
197203
Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
198204
if (Phi->getBasicBlockIndex(PredBB) != -1)
199205
Phi->setIncomingValueForBlock(PredBB, V);
@@ -304,7 +310,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
304310
case VPInstruction::CanonicalIVIncrementForPart:
305311
case VPInstruction::PtrAdd:
306312
case VPInstruction::ExplicitVectorLength:
307-
case VPInstruction::ExitPhi:
313+
case VPInstruction::ResumePhi:
308314
return true;
309315
default:
310316
return false;
@@ -595,20 +601,24 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
595601
Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
596602
return Builder.CreatePtrAdd(Ptr, Addend, Name);
597603
}
598-
case VPInstruction::ExitPhi: {
604+
case VPInstruction::ResumePhi: {
599605
if (Part != 0)
600606
return State.get(this, 0, /*IsScalar*/ true);
601607
Value *IncomingFromVPlanPred =
602608
State.get(getOperand(0), Part, /* IsScalar */ true);
603-
Value *IncomingFromOtherPred =
609+
Value *IncomingFromOtherPreds =
604610
State.get(getOperand(1), Part, /* IsScalar */ true);
605-
auto *NewPhi = Builder.CreatePHI(IncomingFromOtherPred->getType(), 2, Name);
611+
auto *NewPhi =
612+
Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
606613
BasicBlock *VPlanPred =
607614
State.CFG
608615
.VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
609616
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
610-
for (auto *BB : predecessors(Builder.GetInsertBlock()))
611-
NewPhi->addIncoming(IncomingFromOtherPred, BB);
617+
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
618+
assert(OtherPred != VPlanPred &&
619+
"VPlan predecessors should not be connected yet");
620+
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
621+
}
612622
return NewPhi;
613623
}
614624

@@ -619,7 +629,7 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
619629

620630
bool VPInstruction::isVectorToScalar() const {
621631
return getOpcode() == VPInstruction::ExtractFromEnd ||
622-
getOpcode() == VPInstruction::ExitPhi ||
632+
getOpcode() == VPInstruction::ResumePhi ||
623633
getOpcode() == VPInstruction::ComputeReductionResult;
624634
}
625635

@@ -749,7 +759,7 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
749759
case VPInstruction::ActiveLaneMask:
750760
O << "active lane mask";
751761
break;
752-
case VPInstruction::ExitPhi:
762+
case VPInstruction::ResumePhi:
753763
O << "exit-phi";
754764
break;
755765
case VPInstruction::ExplicitVectorLength:

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -937,7 +937,9 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
937937
Type *IntTy = Plan.getCanonicalIV()->getScalarType();
938938

939939
// Extract the penultimate value of the recurrence and update VPLiveOut
940-
// users of the recurrence splice.
940+
// users of the recurrence splice. Note that the extract of the final value
941+
// used to resume in the scalar loop is created earlier during VPlan
942+
// construction.
941943
auto *Penultimate = cast<VPInstruction>(MiddleBuilder.createNaryOp(
942944
VPInstruction::ExtractFromEnd,
943945
{FOR->getBackedgeValue(),

0 commit comments

Comments
 (0)