Skip to content

Commit 48f79e0

Browse files
committed
[VPlan] Hook IR blocks into VPlan during skeleton creation (NFC)
As a first step to move towards modeling the full skeleton in VPlan, start by wrapping IR blocks created during legacy skeleton creation in VPIRBasicBlocks and hook them into the VPlan. This means the skeleton CFG is represented in VPlan, just before execute. This allows moving parts of skeleton creation into recipes in the VPBBs gradually. Note that this allows retiring some manual DT updates, as this will be handled automatically during VPlan execution.
1 parent 5bd1af5 commit 48f79e0

File tree

11 files changed

+165
-134
lines changed

11 files changed

+165
-134
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 53 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
24262426
return VectorTripCount;
24272427
}
24282428

2429+
static void connectScalarPreheaderInVPlan(VPlan &Plan) {
2430+
VPBlockBase *VectorPH = Plan.getVectorPreheader();
2431+
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
2432+
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
2433+
VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH);
2434+
VPBlockUtils::connectBlocks(PredVPB, ScalarPH);
2435+
VPBlockUtils::connectBlocks(PredVPB, VectorPH);
2436+
}
2437+
2438+
static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
2439+
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
2440+
VPBlockBase *VectorPH = Plan.getVectorPreheader();
2441+
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
2442+
VPBlockUtils::disconnectBlocks(PredVPB, VectorPH);
2443+
VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
2444+
VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB);
2445+
VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH);
2446+
VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH);
2447+
}
2448+
24292449
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
24302450
Value *Count = getTripCount();
24312451
// Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
25112531
"TC check is expected to dominate Bypass");
25122532

25132533
// Update dominator for Bypass & LoopExit (if needed).
2514-
DT->changeImmediateDominator(Bypass, TCCheckBlock);
25152534
BranchInst &BI =
25162535
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
25172536
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
25182537
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
25192538
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
25202539
LoopBypassBlocks.push_back(TCCheckBlock);
2540+
2541+
connectScalarPreheaderInVPlan(Plan);
25212542
}
25222543

25232544
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
25342555
"Should already be a bypass block due to iteration count check");
25352556
LoopBypassBlocks.push_back(SCEVCheckBlock);
25362557
AddedSafetyChecks = true;
2558+
2559+
connectCheckBlockInVPlan(Plan, SCEVCheckBlock);
25372560
return SCEVCheckBlock;
25382561
}
25392562

@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
25702593

25712594
AddedSafetyChecks = true;
25722595

2596+
connectCheckBlockInVPlan(Plan, MemCheckBlock);
25732597
return MemCheckBlock;
25742598
}
25752599

@@ -7649,10 +7673,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76497673

76507674
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
76517675
// before making any changes to the CFG.
7652-
if (!BestVPlan.getPreheader()->empty()) {
7676+
if (!BestVPlan.getEntry()->empty()) {
76537677
State.CFG.PrevBB = OrigLoop->getLoopPreheader();
76547678
State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator());
7655-
BestVPlan.getPreheader()->execute(&State);
7679+
BestVPlan.getEntry()->execute(&State);
76567680
}
76577681
if (!ILV.getTripCount())
76587682
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7861,8 +7885,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
78617885
DT->getNode(Bypass)->getIDom()) &&
78627886
"TC check is expected to dominate Bypass");
78637887

7864-
// Update dominator for Bypass.
7865-
DT->changeImmediateDominator(Bypass, TCCheckBlock);
78667888
LoopBypassBlocks.push_back(TCCheckBlock);
78677889

78687890
// Save the trip count so we don't have to regenerate it in the
@@ -7877,6 +7899,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
78777899
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
78787900
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
78797901

7902+
VPBlockBase *VectorPH = Plan.getVectorPreheader();
7903+
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
7904+
if (PredVPB->getNumSuccessors() == 1)
7905+
connectScalarPreheaderInVPlan(Plan);
7906+
else
7907+
connectCheckBlockInVPlan(Plan, TCCheckBlock);
78807908
return TCCheckBlock;
78817909
}
78827910

@@ -7907,32 +7935,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79077935
EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
79087936
VecEpilogueIterationCountCheck, LoopVectorPreHeader);
79097937

7910-
DT->changeImmediateDominator(LoopVectorPreHeader,
7911-
EPI.MainLoopIterationCountCheck);
7912-
79137938
EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
79147939
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
79157940

79167941
if (EPI.SCEVSafetyCheck)
79177942
EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
79187943
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7919-
if (EPI.MemSafetyCheck)
7944+
if (EPI.MemSafetyCheck) {
79207945
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
79217946
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7922-
7923-
DT->changeImmediateDominator(
7924-
VecEpilogueIterationCountCheck,
7925-
VecEpilogueIterationCountCheck->getSinglePredecessor());
7947+
}
79267948

79277949
DT->changeImmediateDominator(LoopScalarPreHeader,
79287950
EPI.EpilogueIterationCountCheck);
7929-
if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
7930-
// If there is an epilogue which must run, there's no edge from the
7931-
// middle block to exit blocks and thus no need to update the immediate
7932-
// dominator of the exit blocks.
7933-
DT->changeImmediateDominator(LoopExitBlock,
7934-
EPI.EpilogueIterationCountCheck);
7935-
79367951
// Keep track of bypass blocks, as they feed start values to the induction and
79377952
// reduction phis in the scalar loop preheader.
79387953
if (EPI.SCEVSafetyCheck)
@@ -8035,6 +8050,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
80358050
}
80368051
ReplaceInstWithInst(Insert->getTerminator(), &BI);
80378052
LoopBypassBlocks.push_back(Insert);
8053+
8054+
// A new entry block has been created for the epilogue VPlan. Hook it in.
8055+
VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert);
8056+
VPBasicBlock *OldEntry = Plan.getEntry();
8057+
VPBlockUtils::reassociateBlocks(OldEntry, NewEntry);
8058+
Plan.setEntry(NewEntry);
8059+
for (auto &R : make_early_inc_range(*NewEntry)) {
8060+
auto *VPIR = dyn_cast<VPIRInstruction>(&R);
8061+
if (!VPIR || !isa<PHINode>(VPIR->getInstruction()))
8062+
break;
8063+
VPIR->eraseFromParent();
8064+
}
8065+
8066+
connectScalarPreheaderInVPlan(Plan);
80388067
return Insert;
80398068
}
80408069

@@ -10270,7 +10299,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1027010299
// should be removed once induction resume value creation is done
1027110300
// directly in VPlan.
1027210301
EpilogILV.setTripCount(MainILV.getTripCount());
10273-
for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
10302+
for (auto &R : make_early_inc_range(*BestEpiPlan.getEntry())) {
1027410303
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
1027510304
if (!ExpandR)
1027610305
continue;
@@ -10330,8 +10359,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1033010359
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
1033110360
}
1033210361

10333-
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
10334-
"DT not preserved correctly");
1033510362
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
1033610363
DT, true, &ExpandedSCEVs);
1033710364
++LoopsEpilogueVectorized;
@@ -10359,6 +10386,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1035910386
checkMixedPrecision(L, ORE);
1036010387
}
1036110388

10389+
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
10390+
"DT not preserved correctly");
10391+
1036210392
std::optional<MDNode *> RemainderLoopID =
1036310393
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1036410394
LLVMLoopVectorizeFollowupEpilogue});

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,8 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
173173
}
174174

175175
void VPBlockBase::setPlan(VPlan *ParentPlan) {
176-
assert(
177-
(ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
178-
"Can only set plan on its entry or preheader block.");
176+
assert(ParentPlan->getEntry() == this &&
177+
"Can only set plan on its entry or preheader block.");
179178
Plan = ParentPlan;
180179
}
181180

@@ -455,13 +454,11 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
455454
"VPIRBasicBlock can have at most two successors at the moment!");
456455
State->Builder.SetInsertPoint(IRBB->getTerminator());
457456
executeRecipes(State, IRBB);
458-
if (getSingleSuccessor()) {
459-
assert(isa<UnreachableInst>(IRBB->getTerminator()));
457+
if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
460458
auto *Br = State->Builder.CreateBr(IRBB);
461459
Br->setOperand(0, nullptr);
462460
IRBB->getTerminator()->eraseFromParent();
463461
}
464-
465462
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
466463
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
467464
BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
@@ -474,7 +471,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
474471
// backedges. A backward successor is set when the branch is created.
475472
const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
476473
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
477-
assert(!TermBr->getSuccessor(idx) &&
474+
assert((!TermBr->getSuccessor(idx) || TermBr->getSuccessor(idx) == IRBB) &&
478475
"Trying to reset an existing successor block.");
479476
TermBr->setSuccessor(idx, IRBB);
480477
State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}});
@@ -853,9 +850,6 @@ VPlan::~VPlan() {
853850
Block->dropAllReferences(&DummyValue);
854851

855852
VPBlockBase::deleteCFG(Entry);
856-
857-
Preheader->dropAllReferences(&DummyValue);
858-
delete Preheader;
859853
}
860854
for (VPValue *VPV : VPLiveInsToFree)
861855
delete VPV;
@@ -878,7 +872,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
878872
VPIRBasicBlock *Entry =
879873
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
880874
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
881-
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
875+
VPBlockUtils::connectBlocks(Entry, VecPreheader);
876+
auto Plan = std::make_unique<VPlan>(Entry);
882877

883878
// Create SCEV and VPValue for the trip count.
884879

@@ -1020,8 +1015,9 @@ void VPlan::execute(VPTransformState *State) {
10201015
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
10211016
State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
10221017

1023-
// Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
1024-
cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
1018+
replaceVPBBWithIRVPBB(
1019+
cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor()),
1020+
VectorPreHeader);
10251021
State->CFG.DTU.applyUpdates(
10261022
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
10271023

@@ -1055,8 +1051,10 @@ void VPlan::execute(VPTransformState *State) {
10551051
MiddleBB->getTerminator()->eraseFromParent();
10561052
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
10571053

1054+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
1055+
Entry);
10581056
// Generate code in the loop pre-header and body.
1059-
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
1057+
for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end()))
10601058
Block->execute(State);
10611059

10621060
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
@@ -1107,9 +1105,6 @@ void VPlan::execute(VPTransformState *State) {
11071105
}
11081106

11091107
State->CFG.DTU.flush();
1110-
assert(State->CFG.DTU.getDomTree().verify(
1111-
DominatorTree::VerificationLevel::Fast) &&
1112-
"DT not preserved correctly");
11131108
}
11141109

11151110
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
@@ -1162,12 +1157,10 @@ void VPlan::print(raw_ostream &O) const {
11621157

11631158
printLiveIns(O);
11641159

1165-
if (!getPreheader()->empty()) {
1166-
O << "\n";
1167-
getPreheader()->print(O, "", SlotTracker);
1168-
}
1160+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
1161+
RPOT(getEntry());
11691162

1170-
for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
1163+
for (const VPBlockBase *Block : RPOT) {
11711164
O << '\n';
11721165
Block->print(O, "", SlotTracker);
11731166
}
@@ -1204,6 +1197,20 @@ std::string VPlan::getName() const {
12041197
return Out;
12051198
}
12061199

1200+
VPRegionBlock *VPlan::getVectorLoopRegion() {
1201+
for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
1202+
if (auto *R = dyn_cast<VPRegionBlock>(B))
1203+
return R;
1204+
return nullptr;
1205+
}
1206+
1207+
const VPRegionBlock *VPlan::getVectorLoopRegion() const {
1208+
for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
1209+
if (auto *R = dyn_cast<VPRegionBlock>(B))
1210+
return R;
1211+
return nullptr;
1212+
}
1213+
12071214
LLVM_DUMP_METHOD
12081215
void VPlan::printDOT(raw_ostream &O) const {
12091216
VPlanPrinter Printer(O, *this);
@@ -1259,11 +1266,10 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12591266

12601267
VPlan *VPlan::duplicate() {
12611268
// Clone blocks.
1262-
VPBasicBlock *NewPreheader = Preheader->clone();
12631269
const auto &[NewEntry, __] = cloneFrom(Entry);
12641270

12651271
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
1266-
auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
1272+
auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry));
12671273
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
12681274
for (VPValue *OldLiveIn : VPLiveInsToFree) {
12691275
Old2NewVPValues[OldLiveIn] =
@@ -1283,7 +1289,6 @@ VPlan *VPlan::duplicate() {
12831289
// else NewTripCount will be created and inserted into Old2NewVPValues when
12841290
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
12851291

1286-
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12871292
remapOperands(Entry, NewEntry, Old2NewVPValues);
12881293

12891294
// Clone live-outs.
@@ -1301,6 +1306,19 @@ VPlan *VPlan::duplicate() {
13011306
return NewPlan;
13021307
}
13031308

1309+
VPBasicBlock *VPlan::getScalarPreheader() {
1310+
auto *MiddleVPBB =
1311+
cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
1312+
if (MiddleVPBB->getNumSuccessors() == 2) {
1313+
// Order is strict: first is the exit block, second is the scalar preheader.
1314+
return cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
1315+
}
1316+
if (auto *IRVPBB = dyn_cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()))
1317+
return IRVPBB;
1318+
1319+
return nullptr;
1320+
}
1321+
13041322
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
13051323

13061324
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -1339,8 +1357,6 @@ void VPlanPrinter::dump() {
13391357
OS << "edge [fontname=Courier, fontsize=30]\n";
13401358
OS << "compound=true\n";
13411359

1342-
dumpBlock(Plan.getPreheader());
1343-
13441360
for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
13451361
dumpBlock(Block);
13461362

@@ -1601,7 +1617,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
16011617
assignName(Plan.BackedgeTakenCount);
16021618
for (VPValue *LI : Plan.VPLiveInsToFree)
16031619
assignName(LI);
1604-
assignNames(Plan.getPreheader());
16051620

16061621
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
16071622
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));

0 commit comments

Comments
 (0)