Skip to content

Commit 39f0f73

Browse files
committed
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring VPLiveOut, as the remaining live-outs can now be handled by adding operands to the wrapped phis in the scalar loop header. Note that the current version only includes the scalar loop header, no other loop blocks and also does not wrap it in a region block. This can either be included in this PR or in follow-ups as needed.
1 parent fe06a6d commit 39f0f73

File tree

8 files changed

+52
-132
lines changed

8 files changed

+52
-132
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2956,10 +2956,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29562956
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
29572957
}
29582958

2959-
// Fix live-out phis not already fixed earlier.
2960-
for (const auto &KV : Plan.getLiveOuts())
2961-
KV.second->fixPhi(Plan, State);
2962-
29632959
for (Instruction *PI : PredicatedInstructions)
29642960
sinkScalarOperands(&*PI);
29652961

@@ -8816,7 +8812,14 @@ static void addLiveOutsForFirstOrderRecurrences(
88168812
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
88178813
"scalar.recur.init");
88188814
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8819-
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
8815+
for (VPRecipeBase &R :
8816+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
8817+
auto *IRI = cast<VPIRInstruction>(&R);
8818+
if (&IRI->getInstruction() == FORPhi) {
8819+
IRI->addOperand(ResumePhiRecipe);
8820+
break;
8821+
}
8822+
}
88208823

88218824
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
88228825
// Extract the penultimate value of the recurrence and use it as operand for

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -457,10 +457,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
457457
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
458458
executeRecipes(State, getIRBasicBlock());
459459
if (getSingleSuccessor()) {
460-
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
461-
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
462-
Br->setOperand(0, nullptr);
463-
getIRBasicBlock()->getTerminator()->eraseFromParent();
460+
auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
461+
if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
462+
getIRBasicBlock()->getSingleSuccessor()) {
463+
cast<BranchInst>(getIRBasicBlock()->getTerminator())
464+
->setOperand(0, nullptr);
465+
} else {
466+
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
467+
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
468+
Br->setOperand(0, nullptr);
469+
getIRBasicBlock()->getTerminator()->eraseFromParent();
470+
}
464471
}
465472

466473
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -844,10 +851,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
844851
#endif
845852

846853
VPlan::~VPlan() {
847-
for (auto &KV : LiveOuts)
848-
delete KV.second;
849-
LiveOuts.clear();
850-
851854
if (Entry) {
852855
VPValue DummyValue;
853856
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -902,6 +905,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
902905
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
903906

904907
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
908+
VPBasicBlock *ScalarHeader = createVPIRBasicBlockFor(TheLoop->getHeader());
909+
VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
905910
if (!RequiresScalarEpilogueCheck) {
906911
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
907912
return Plan;
@@ -1051,6 +1056,8 @@ void VPlan::execute(VPTransformState *State) {
10511056
BrInst->insertBefore(MiddleBB->getTerminator());
10521057
MiddleBB->getTerminator()->eraseFromParent();
10531058
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
1059+
State->CFG.DTU.applyUpdates(
1060+
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
10541061

10551062
// Generate code in the loop pre-header and body.
10561063
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1169,12 +1176,6 @@ void VPlan::print(raw_ostream &O) const {
11691176
Block->print(O, "", SlotTracker);
11701177
}
11711178

1172-
if (!LiveOuts.empty())
1173-
O << "\n";
1174-
for (const auto &KV : LiveOuts) {
1175-
KV.second->print(O, SlotTracker);
1176-
}
1177-
11781179
O << "}\n";
11791180
}
11801181

@@ -1211,11 +1212,6 @@ LLVM_DUMP_METHOD
12111212
void VPlan::dump() const { print(dbgs()); }
12121213
#endif
12131214

1214-
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1215-
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1216-
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1217-
}
1218-
12191215
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12201216
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
12211217
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1283,10 +1279,6 @@ VPlan *VPlan::duplicate() {
12831279
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12841280
remapOperands(Entry, NewEntry, Old2NewVPValues);
12851281

1286-
// Clone live-outs.
1287-
for (const auto &[_, LO] : LiveOuts)
1288-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1289-
12901282
// Initialize remaining fields of cloned VPlan.
12911283
NewPlan->VFs = VFs;
12921284
NewPlan->UFs = UFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -671,48 +671,6 @@ class VPBlockBase {
671671
virtual VPBlockBase *clone() = 0;
672672
};
673673

674-
/// A value that is used outside the VPlan. The operand of the user needs to be
675-
/// added to the associated phi node. The incoming block from VPlan is
676-
/// determined by where the VPValue is defined: if it is defined by a recipe
677-
/// outside a region, its parent block is used, otherwise the middle block is
678-
/// used.
679-
class VPLiveOut : public VPUser {
680-
PHINode *Phi;
681-
682-
public:
683-
VPLiveOut(PHINode *Phi, VPValue *Op)
684-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
685-
686-
static inline bool classof(const VPUser *U) {
687-
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
688-
}
689-
690-
/// Fix the wrapped phi node. This means adding an incoming value to exit
691-
/// block phi's from the vector loop via middle block (values from scalar loop
692-
/// already reach these phi's), and updating the value to scalar header phi's
693-
/// from the scalar preheader.
694-
void fixPhi(VPlan &Plan, VPTransformState &State);
695-
696-
/// Returns true if the VPLiveOut uses scalars of operand \p Op.
697-
bool usesScalars(const VPValue *Op) const override {
698-
assert(is_contained(operands(), Op) &&
699-
"Op must be an operand of the recipe");
700-
return true;
701-
}
702-
703-
PHINode *getPhi() const { return Phi; }
704-
705-
/// Live-outs are marked as only using the first part during the transition
706-
/// to unrolling directly on VPlan.
707-
/// TODO: Remove after unroller transition.
708-
bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
709-
710-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
711-
/// Print the VPLiveOut to \p O.
712-
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
713-
#endif
714-
};
715-
716674
/// Struct to hold various analysis needed for cost computations.
717675
struct VPCostContext {
718676
const TargetTransformInfo &TTI;
@@ -3454,11 +3412,6 @@ class VPlan {
34543412
/// definitions are VPValues that hold a pointer to their underlying IR.
34553413
SmallVector<VPValue *, 16> VPLiveInsToFree;
34563414

3457-
/// Values used outside the plan. It contains live-outs that need fixing. Any
3458-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3459-
/// live-outs are fixed via VPLiveOut::fixPhi.
3460-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3461-
34623415
/// Mapping from SCEVs to the VPValues representing their expansions.
34633416
/// NOTE: This mapping is temporary and will be removed once all users have
34643417
/// been modeled in VPlan directly.
@@ -3638,12 +3591,6 @@ class VPlan {
36383591
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
36393592
}
36403593

3641-
void addLiveOut(PHINode *PN, VPValue *V);
3642-
3643-
const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
3644-
return LiveOuts;
3645-
}
3646-
36473594
VPValue *getSCEVExpansion(const SCEV *S) const {
36483595
return SCEVToExpansion.lookup(S);
36493596
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -197,35 +197,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
197197
}
198198
}
199199

200-
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
201-
VPValue *ExitValue = getOperand(0);
202-
VPBasicBlock *MiddleVPBB =
203-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
204-
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
205-
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
206-
// Values leaving the vector loop reach live out phi's in the exiting block
207-
// via middle block.
208-
auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
209-
? MiddleVPBB
210-
: ExitingVPBB;
211-
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
212-
Value *V = State.get(ExitValue, VPIteration(0, 0));
213-
if (Phi->getBasicBlockIndex(PredBB) != -1)
214-
Phi->setIncomingValueForBlock(PredBB, V);
215-
else
216-
Phi->addIncoming(V, PredBB);
217-
}
218-
219-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
220-
void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
221-
O << "Live-out ";
222-
getPhi()->printAsOperand(O);
223-
O << " = ";
224-
getOperand(0)->printAsOperand(O, SlotTracker);
225-
O << "\n";
226-
}
227-
#endif
228-
229200
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
230201
assert(!Parent && "Recipe already in some VPBasicBlock");
231202
assert(InsertPos->getParent() &&
@@ -867,7 +838,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
867838
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
868839
Value *V = State.get(ExitValue, VPIteration(0, Lane));
869840
auto *Phi = cast<PHINode>(&I);
870-
Phi->addIncoming(V, PredBB);
841+
if (Phi->getBasicBlockIndex(PredBB) == -1)
842+
Phi->addIncoming(V, PredBB);
843+
else
844+
Phi->setIncomingValueForBlock(PredBB, V);
871845
}
872846

873847
// Advance the insert point after the wrapped IR instruction. This allows

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
378378
// Don't fold the exit block of the Plan into its single predecessor for
379379
// now.
380380
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
381-
if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
381+
if (!VPBB->getParent())
382382
continue;
383383
auto *PredVPBB =
384384
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
265265

266266
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
267267
VPValue *Op0, *Op1;
268+
268269
if (match(VPI, m_VPInstruction<VPInstruction::ExtractFromEnd>(
269270
m_VPValue(Op0), m_VPValue(Op1)))) {
270271
VPI->setOperand(1, getValueForPart(Op1, UF - 1));
@@ -281,6 +282,11 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
281282
}
282283
return;
283284
}
285+
if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
286+
m_VPValue(Op1)))) {
287+
addUniformForAllParts(VPI);
288+
return;
289+
}
284290

285291
if (vputils::onlyFirstPartUsed(VPI)) {
286292
addUniformForAllParts(VPI);
@@ -467,11 +473,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
467473
Part++;
468474
}
469475

470-
// Remap the operand of live-outs to the last part.
471-
for (const auto &[_, LO] : Plan.getLiveOuts()) {
472-
VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
473-
LO->setOperand(0, In);
474-
}
475-
476476
VPlanTransforms::removeDeadRecipes(Plan);
477477
}

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
244244
return false;
245245
}
246246

247-
VPBlockBase *MiddleBB =
248-
IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
249-
if (IRBB != IRBB->getPlan()->getPreheader() &&
250-
IRBB->getSinglePredecessor() != MiddleBB) {
251-
errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
252-
"middle-block at the moment!\n";
253-
return false;
254-
}
255247
return true;
256248
}
257249

@@ -416,12 +408,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
416408
return false;
417409
}
418410

419-
for (const auto &KV : Plan.getLiveOuts())
420-
if (KV.second->getNumOperands() != 1) {
421-
errs() << "live outs must have a single operand\n";
422-
return false;
423-
}
424-
425411
return true;
426412
}
427413

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
10771077
; CHECK-NEXT: No successors
10781078
; CHECK-EMPTY:
10791079
; CHECK-NEXT: scalar.ph
1080+
; CHECK-NEXT: Successor(s): ir-bb<loop>
1081+
; CHECK-EMPTY:
1082+
; CHECK-NEXT: ir-bb<loop>:
1083+
; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
1084+
; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1
1085+
; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
1086+
; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16
1087+
; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
1088+
; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
1089+
; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16
1090+
; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0
10801091
; CHECK-NEXT: No successors
10811092
; CHECK-NEXT: }
10821093
;
@@ -1156,6 +1167,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
11561167
; CHECK-NEXT: No successors
11571168
; CHECK-EMPTY:
11581169
; CHECK-NEXT: scalar.ph:
1170+
; CHECK-NEXT: Successor(s): ir-bb<loop.header>
1171+
; CHECK-EMPTY:
1172+
; CHECK-NEXT: ir-bb<loop.header>:
1173+
; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
1174+
; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
1175+
; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1
1176+
; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0
11591177
; CHECK-NEXT: No successors
11601178
; CHECK-NEXT: }
11611179
;

0 commit comments

Comments
 (0)