Skip to content

Commit 1b89761

Browse files
committed
[VPlan] Hook IR blocks into VPlan during skeleton creation (NFC)
As a first step to move towards modeling the full skeleton in VPlan, start by wrapping IR blocks created during legacy skeleton creation in VPIRBasicBlocks and hook them into the VPlan. This means the skeleton CFG is represented in VPlan, just before execute. This allows moving parts of skeleton creation into recipes in the VPBBs gradually. Note that this allows retiring some manual DT updates, as this will be handled automatically during VPlan execution.
1 parent 45ae7d1 commit 1b89761

File tree

11 files changed

+167
-123
lines changed

11 files changed

+167
-123
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 53 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
24262426
return VectorTripCount;
24272427
}
24282428

2429+
static void connectScalarPreheaderInVPlan(VPlan &Plan) {
2430+
VPBlockBase *VectorPH = Plan.getVectorPreheader();
2431+
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
2432+
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
2433+
VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH);
2434+
VPBlockUtils::connectBlocks(PredVPB, ScalarPH);
2435+
VPBlockUtils::connectBlocks(PredVPB, VectorPH);
2436+
}
2437+
2438+
static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
2439+
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
2440+
VPBlockBase *VectorPH = Plan.getVectorPreheader();
2441+
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
2442+
VPBlockUtils::disconnectBlocks(PredVPB, VectorPH);
2443+
VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
2444+
VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB);
2445+
VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH);
2446+
VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH);
2447+
}
2448+
24292449
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
24302450
Value *Count = getTripCount();
24312451
// Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
25112531
"TC check is expected to dominate Bypass");
25122532

25132533
// Update dominator for Bypass & LoopExit (if needed).
2514-
DT->changeImmediateDominator(Bypass, TCCheckBlock);
25152534
BranchInst &BI =
25162535
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
25172536
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
25182537
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
25192538
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
25202539
LoopBypassBlocks.push_back(TCCheckBlock);
2540+
2541+
connectScalarPreheaderInVPlan(Plan);
25212542
}
25222543

25232544
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
25342555
"Should already be a bypass block due to iteration count check");
25352556
LoopBypassBlocks.push_back(SCEVCheckBlock);
25362557
AddedSafetyChecks = true;
2558+
2559+
connectCheckBlockInVPlan(Plan, SCEVCheckBlock);
25372560
return SCEVCheckBlock;
25382561
}
25392562

@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
25702593

25712594
AddedSafetyChecks = true;
25722595

2596+
connectCheckBlockInVPlan(Plan, MemCheckBlock);
25732597
return MemCheckBlock;
25742598
}
25752599

@@ -7648,10 +7672,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76487672

76497673
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
76507674
// before making any changes to the CFG.
7651-
if (!BestVPlan.getPreheader()->empty()) {
7675+
if (!BestVPlan.getEntry()->empty()) {
76527676
State.CFG.PrevBB = OrigLoop->getLoopPreheader();
76537677
State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator());
7654-
BestVPlan.getPreheader()->execute(&State);
7678+
BestVPlan.getEntry()->execute(&State);
76557679
}
76567680
if (!ILV.getTripCount())
76577681
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7859,8 +7883,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
78597883
DT->getNode(Bypass)->getIDom()) &&
78607884
"TC check is expected to dominate Bypass");
78617885

7862-
// Update dominator for Bypass.
7863-
DT->changeImmediateDominator(Bypass, TCCheckBlock);
78647886
LoopBypassBlocks.push_back(TCCheckBlock);
78657887

78667888
// Save the trip count so we don't have to regenerate it in the
@@ -7875,6 +7897,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
78757897
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
78767898
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
78777899

7900+
VPBlockBase *VectorPH = Plan.getVectorPreheader();
7901+
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
7902+
if (PredVPB->getNumSuccessors() == 1)
7903+
connectScalarPreheaderInVPlan(Plan);
7904+
else
7905+
connectCheckBlockInVPlan(Plan, TCCheckBlock);
78787906
return TCCheckBlock;
78797907
}
78807908

@@ -7905,32 +7933,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79057933
EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
79067934
VecEpilogueIterationCountCheck, LoopVectorPreHeader);
79077935

7908-
DT->changeImmediateDominator(LoopVectorPreHeader,
7909-
EPI.MainLoopIterationCountCheck);
7910-
79117936
EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
79127937
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
79137938

79147939
if (EPI.SCEVSafetyCheck)
79157940
EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
79167941
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7917-
if (EPI.MemSafetyCheck)
7942+
if (EPI.MemSafetyCheck) {
79187943
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
79197944
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7920-
7921-
DT->changeImmediateDominator(
7922-
VecEpilogueIterationCountCheck,
7923-
VecEpilogueIterationCountCheck->getSinglePredecessor());
7945+
}
79247946

79257947
DT->changeImmediateDominator(LoopScalarPreHeader,
79267948
EPI.EpilogueIterationCountCheck);
7927-
if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
7928-
// If there is an epilogue which must run, there's no edge from the
7929-
// middle block to exit blocks and thus no need to update the immediate
7930-
// dominator of the exit blocks.
7931-
DT->changeImmediateDominator(LoopExitBlock,
7932-
EPI.EpilogueIterationCountCheck);
7933-
79347949
// Keep track of bypass blocks, as they feed start values to the induction and
79357950
// reduction phis in the scalar loop preheader.
79367951
if (EPI.SCEVSafetyCheck)
@@ -8033,6 +8048,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
80338048
}
80348049
ReplaceInstWithInst(Insert->getTerminator(), &BI);
80358050
LoopBypassBlocks.push_back(Insert);
8051+
8052+
// A new entry block has been created for the epilogue VPlan. Hook it in.
8053+
VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert);
8054+
VPBasicBlock *OldEntry = Plan.getEntry();
8055+
VPBlockUtils::reassociateBlocks(OldEntry, NewEntry);
8056+
Plan.setEntry(NewEntry);
8057+
for (auto &R : make_early_inc_range(*NewEntry)) {
8058+
auto *VPIR = dyn_cast<VPIRInstruction>(&R);
8059+
if (!VPIR || !isa<PHINode>(VPIR->getInstruction()))
8060+
break;
8061+
VPIR->eraseFromParent();
8062+
}
8063+
8064+
connectScalarPreheaderInVPlan(Plan);
80368065
return Insert;
80378066
}
80388067

@@ -10256,7 +10285,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1025610285
// should be removed once induction resume value creation is done
1025710286
// directly in VPlan.
1025810287
EpilogILV.setTripCount(MainILV.getTripCount());
10259-
for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
10288+
for (auto &R : make_early_inc_range(*BestEpiPlan.getEntry())) {
1026010289
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
1026110290
if (!ExpandR)
1026210291
continue;
@@ -10316,8 +10345,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1031610345
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
1031710346
}
1031810347

10319-
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
10320-
"DT not preserved correctly");
1032110348
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
1032210349
DT, true, &ExpandedSCEVs);
1032310350
++LoopsEpilogueVectorized;
@@ -10345,6 +10372,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1034510372
checkMixedPrecision(L, ORE);
1034610373
}
1034710374

10375+
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
10376+
"DT not preserved correctly");
10377+
1034810378
std::optional<MDNode *> RemainderLoopID =
1034910379
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1035010380
LLVMLoopVectorizeFollowupEpilogue});

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,8 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
170170
}
171171

172172
void VPBlockBase::setPlan(VPlan *ParentPlan) {
173-
assert(
174-
(ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
175-
"Can only set plan on its entry or preheader block.");
173+
assert(ParentPlan->getEntry() == this &&
174+
"Can only set plan on its entry or preheader block.");
176175
Plan = ParentPlan;
177176
}
178177

@@ -463,7 +462,6 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
463462
(getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
464463
"other blocks must be terminated by a branch");
465464
}
466-
467465
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
468466
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
469467
BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
@@ -851,9 +849,6 @@ VPlan::~VPlan() {
851849
Block->dropAllReferences(&DummyValue);
852850

853851
VPBlockBase::deleteCFG(Entry);
854-
855-
Preheader->dropAllReferences(&DummyValue);
856-
delete Preheader;
857852
}
858853
for (VPValue *VPV : VPLiveInsToFree)
859854
delete VPV;
@@ -876,9 +871,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
876871
VPIRBasicBlock *Entry =
877872
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
878873
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
874+
VPBlockUtils::connectBlocks(Entry, VecPreheader);
879875
VPIRBasicBlock *ScalarHeader =
880876
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
881-
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
877+
auto Plan = std::make_unique<VPlan>(Entry, ScalarHeader);
882878

883879
// Create SCEV and VPValue for the trip count.
884880

@@ -1021,8 +1017,9 @@ void VPlan::execute(VPTransformState *State) {
10211017
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
10221018
State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
10231019

1024-
// Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
1025-
cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
1020+
replaceVPBBWithIRVPBB(
1021+
cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor()),
1022+
VectorPreHeader);
10261023
State->CFG.DTU.applyUpdates(
10271024
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
10281025

@@ -1049,8 +1046,10 @@ void VPlan::execute(VPTransformState *State) {
10491046
State->CFG.DTU.applyUpdates(
10501047
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
10511048

1049+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
1050+
Entry);
10521051
// Generate code in the loop pre-header and body.
1053-
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
1052+
for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end()))
10541053
Block->execute(State);
10551054

10561055
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
@@ -1101,9 +1100,6 @@ void VPlan::execute(VPTransformState *State) {
11011100
}
11021101

11031102
State->CFG.DTU.flush();
1104-
assert(State->CFG.DTU.getDomTree().verify(
1105-
DominatorTree::VerificationLevel::Fast) &&
1106-
"DT not preserved correctly");
11071103
}
11081104

11091105
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
@@ -1156,12 +1152,10 @@ void VPlan::print(raw_ostream &O) const {
11561152

11571153
printLiveIns(O);
11581154

1159-
if (!getPreheader()->empty()) {
1160-
O << "\n";
1161-
getPreheader()->print(O, "", SlotTracker);
1162-
}
1155+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
1156+
RPOT(getEntry());
11631157

1164-
for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
1158+
for (const VPBlockBase *Block : RPOT) {
11651159
O << '\n';
11661160
Block->print(O, "", SlotTracker);
11671161
}
@@ -1192,6 +1186,20 @@ std::string VPlan::getName() const {
11921186
return Out;
11931187
}
11941188

1189+
VPRegionBlock *VPlan::getVectorLoopRegion() {
1190+
for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
1191+
if (auto *R = dyn_cast<VPRegionBlock>(B))
1192+
return R;
1193+
return nullptr;
1194+
}
1195+
1196+
const VPRegionBlock *VPlan::getVectorLoopRegion() const {
1197+
for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
1198+
if (auto *R = dyn_cast<VPRegionBlock>(B))
1199+
return R;
1200+
return nullptr;
1201+
}
1202+
11951203
LLVM_DUMP_METHOD
11961204
void VPlan::printDOT(raw_ostream &O) const {
11971205
VPlanPrinter Printer(O, *this);
@@ -1242,7 +1250,6 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12421250

12431251
VPlan *VPlan::duplicate() {
12441252
// Clone blocks.
1245-
VPBasicBlock *NewPreheader = Preheader->clone();
12461253
const auto &[NewEntry, __] = cloneFrom(Entry);
12471254

12481255
BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
@@ -1252,8 +1259,7 @@ VPlan *VPlan::duplicate() {
12521259
return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
12531260
}));
12541261
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
1255-
auto *NewPlan =
1256-
new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1262+
auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
12571263
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
12581264
for (VPValue *OldLiveIn : VPLiveInsToFree) {
12591265
Old2NewVPValues[OldLiveIn] =
@@ -1273,7 +1279,6 @@ VPlan *VPlan::duplicate() {
12731279
// else NewTripCount will be created and inserted into Old2NewVPValues when
12741280
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
12751281

1276-
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12771282
remapOperands(Entry, NewEntry, Old2NewVPValues);
12781283

12791284
// Initialize remaining fields of cloned VPlan.
@@ -1287,6 +1292,19 @@ VPlan *VPlan::duplicate() {
12871292
return NewPlan;
12881293
}
12891294

1295+
VPBasicBlock *VPlan::getScalarPreheader() {
1296+
auto *MiddleVPBB =
1297+
cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
1298+
if (MiddleVPBB->getNumSuccessors() == 2) {
1299+
// Order is strict: first is the exit block, second is the scalar preheader.
1300+
return cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
1301+
}
1302+
if (auto *IRVPBB = dyn_cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()))
1303+
return IRVPBB;
1304+
1305+
return nullptr;
1306+
}
1307+
12901308
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
12911309

12921310
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -1325,8 +1343,6 @@ void VPlanPrinter::dump() {
13251343
OS << "edge [fontname=Courier, fontsize=30]\n";
13261344
OS << "compound=true\n";
13271345

1328-
dumpBlock(Plan.getPreheader());
1329-
13301346
for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
13311347
dumpBlock(Block);
13321348

@@ -1587,7 +1603,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
15871603
assignName(Plan.BackedgeTakenCount);
15881604
for (VPValue *LI : Plan.VPLiveInsToFree)
15891605
assignName(LI);
1590-
assignNames(Plan.getPreheader());
15911606

15921607
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
15931608
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));

0 commit comments

Comments
 (0)