Skip to content

Commit 6c8f41d

Browse files
authored
[VPlan] Hook IR blocks into VPlan during skeleton creation (NFC) (#114292)
As a first step to move towards modeling the full skeleton in VPlan, start by wrapping IR blocks created during legacy skeleton creation in VPIRBasicBlocks and hook them into the VPlan. This means the skeleton CFG is represented in VPlan, just before execute. This allows moving parts of skeleton creation into recipes in the VPBBs gradually. Note that this allows retiring some manual DT updates, as this will be handled automatically during VPlan execution. PR: #114292
1 parent fda80a4 commit 6c8f41d

37 files changed

+452
-184
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2466,6 +2466,25 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
24662466
return VectorTripCount;
24672467
}
24682468

2469+
/// Introduces a new VPIRBasicBlock for \p CheckIRBB to \p Plan between the
2470+
/// vector preheader and its predecessor, also connecting the new block to the
2471+
/// scalar preheader.
2472+
static void introduceCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
2473+
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
2474+
VPBlockBase *VectorPH = Plan.getVectorPreheader();
2475+
VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
2476+
if (PreVectorPH->getNumSuccessors() != 1) {
2477+
assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
2478+
assert(PreVectorPH->getSuccessors()[0] == ScalarPH &&
2479+
"Unexpected successor");
2480+
VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
2481+
VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckVPIRBB);
2482+
PreVectorPH = CheckVPIRBB;
2483+
}
2484+
VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH);
2485+
PreVectorPH->swapSuccessors();
2486+
}
2487+
24692488
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
24702489
Value *Count = getTripCount();
24712490
// Reuse existing vector loop preheader for TC checks.
@@ -2540,14 +2559,15 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
25402559
DT->getNode(Bypass)->getIDom()) &&
25412560
"TC check is expected to dominate Bypass");
25422561

2543-
// Update dominator for Bypass & LoopExit (if needed).
2544-
DT->changeImmediateDominator(Bypass, TCCheckBlock);
25452562
BranchInst &BI =
25462563
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
25472564
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
25482565
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
25492566
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
25502567
LoopBypassBlocks.push_back(TCCheckBlock);
2568+
2569+
// TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here.
2570+
introduceCheckBlockInVPlan(Plan, TCCheckBlock);
25512571
}
25522572

25532573
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2564,6 +2584,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
25642584
"Should already be a bypass block due to iteration count check");
25652585
LoopBypassBlocks.push_back(SCEVCheckBlock);
25662586
AddedSafetyChecks = true;
2587+
2588+
introduceCheckBlockInVPlan(Plan, SCEVCheckBlock);
25672589
return SCEVCheckBlock;
25682590
}
25692591

@@ -2600,6 +2622,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
26002622

26012623
AddedSafetyChecks = true;
26022624

2625+
introduceCheckBlockInVPlan(Plan, MemCheckBlock);
26032626
return MemCheckBlock;
26042627
}
26052628

@@ -7980,8 +8003,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
79808003
DT->getNode(Bypass)->getIDom()) &&
79818004
"TC check is expected to dominate Bypass");
79828005

7983-
// Update dominator for Bypass.
7984-
DT->changeImmediateDominator(Bypass, TCCheckBlock);
79858006
LoopBypassBlocks.push_back(TCCheckBlock);
79868007

79878008
// Save the trip count so we don't have to regenerate it in the
@@ -7996,6 +8017,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
79968017
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
79978018
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
79988019

8020+
introduceCheckBlockInVPlan(Plan, TCCheckBlock);
79998021
return TCCheckBlock;
80008022
}
80018023

@@ -8027,9 +8049,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80278049
EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
80288050
VecEpilogueIterationCountCheck, LoopVectorPreHeader);
80298051

8030-
DT->changeImmediateDominator(LoopVectorPreHeader,
8031-
EPI.MainLoopIterationCountCheck);
8032-
80338052
EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
80348053
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
80358054

@@ -8040,19 +8059,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80408059
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
80418060
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
80428061

8043-
DT->changeImmediateDominator(
8044-
VecEpilogueIterationCountCheck,
8045-
VecEpilogueIterationCountCheck->getSinglePredecessor());
8046-
80478062
DT->changeImmediateDominator(LoopScalarPreHeader,
80488063
EPI.EpilogueIterationCountCheck);
8049-
if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
8050-
// If there is an epilogue which must run, there's no edge from the
8051-
// middle block to exit blocks and thus no need to update the immediate
8052-
// dominator of the exit blocks.
8053-
DT->changeImmediateDominator(OrigLoop->getUniqueLatchExitBlock(),
8054-
EPI.EpilogueIterationCountCheck);
8055-
80568064
// Keep track of bypass blocks, as they feed start values to the induction and
80578065
// reduction phis in the scalar loop preheader.
80588066
if (EPI.SCEVSafetyCheck)
@@ -8143,6 +8151,16 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
81438151
}
81448152
ReplaceInstWithInst(Insert->getTerminator(), &BI);
81458153
LoopBypassBlocks.push_back(Insert);
8154+
8155+
// A new entry block has been created for the epilogue VPlan. Hook it in, as
8156+
// otherwise we would try to modify the entry to the main vector loop.
8157+
VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert);
8158+
VPBasicBlock *OldEntry = Plan.getEntry();
8159+
VPBlockUtils::reassociateBlocks(OldEntry, NewEntry);
8160+
Plan.setEntry(NewEntry);
8161+
delete OldEntry;
8162+
8163+
introduceCheckBlockInVPlan(Plan, Insert);
81468164
return Insert;
81478165
}
81488166

@@ -10495,8 +10513,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1049510513
EpilogILV.setTripCount(MainILV.getTripCount());
1049610514
preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI);
1049710515

10498-
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
10499-
"DT not preserved correctly");
1050010516
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
1050110517
DT, true, &ExpandedSCEVs);
1050210518
++LoopsEpilogueVectorized;
@@ -10524,6 +10540,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1052410540
checkMixedPrecision(L, ORE);
1052510541
}
1052610542

10543+
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
10544+
"DT not preserved correctly");
10545+
1052710546
std::optional<MDNode *> RemainderLoopID =
1052810547
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1052910548
LLVMLoopVectorizeFollowupEpilogue});

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 49 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,7 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
170170
}
171171

172172
void VPBlockBase::setPlan(VPlan *ParentPlan) {
173-
assert(
174-
(ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
175-
"Can only set plan on its entry or preheader block.");
173+
assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
176174
Plan = ParentPlan;
177175
}
178176

@@ -823,16 +821,25 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
823821
}
824822
#endif
825823

824+
VPlan::VPlan(VPBasicBlock *OriginalPreheader, VPValue *TC,
825+
VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader)
826+
: VPlan(OriginalPreheader, TC, ScalarHeader) {
827+
VPBlockUtils::connectBlocks(OriginalPreheader, EntryVectorPreHeader);
828+
}
829+
830+
VPlan::VPlan(VPBasicBlock *OriginalPreheader,
831+
VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader)
832+
: VPlan(OriginalPreheader, ScalarHeader) {
833+
VPBlockUtils::connectBlocks(OriginalPreheader, EntryVectorPreHeader);
834+
}
835+
826836
VPlan::~VPlan() {
827837
if (Entry) {
828838
VPValue DummyValue;
829839
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
830840
Block->dropAllReferences(&DummyValue);
831841

832842
VPBlockBase::deleteCFG(Entry);
833-
834-
Preheader->dropAllReferences(&DummyValue);
835-
delete Preheader;
836843
}
837844
for (VPValue *VPV : VPLiveInsToFree)
838845
delete VPV;
@@ -855,9 +862,16 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
855862
VPIRBasicBlock *Entry =
856863
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
857864
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
865+
// Connect entry only to vector preheader initially. Entry will also be
866+
// connected to the scalar preheader later, during skeleton creation when
867+
// runtime guards are added as needed. Note that when executing the VPlan for
868+
// an epilogue vector loop, the original entry block here will be replaced by
869+
// a new VPIRBasicBlock wrapping the entry to the epilogue vector loop after
870+
// generating code for the main vector loop.
871+
VPBlockUtils::connectBlocks(Entry, VecPreheader);
858872
VPIRBasicBlock *ScalarHeader =
859873
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
860-
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
874+
auto Plan = std::make_unique<VPlan>(Entry, ScalarHeader);
861875

862876
// Create SCEV and VPValue for the trip count.
863877
// We use the symbolic max backedge-taken-count, which works also when
@@ -981,15 +995,21 @@ void VPlan::execute(VPTransformState *State) {
981995
State->CFG.DTU.applyUpdates(
982996
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
983997

984-
// Replace regular VPBB's for the middle and scalar preheader blocks with
985-
// VPIRBasicBlocks wrapping their IR blocks. The IR blocks are created during
986-
// skeleton creation, so we can only create the VPIRBasicBlocks now during
987-
// VPlan execution rather than earlier during VPlan construction.
998+
// Replace regular VPBB's for the vector preheader, middle and scalar
999+
// preheader blocks with VPIRBasicBlocks wrapping their IR blocks. The IR
1000+
// blocks are created during skeleton creation, so we can only create the
1001+
// VPIRBasicBlocks now during VPlan execution rather than earlier during VPlan
1002+
// construction.
9881003
BasicBlock *MiddleBB = State->CFG.ExitBB;
989-
VPBasicBlock *MiddleVPBB = getMiddleBlock();
9901004
BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor();
1005+
replaceVPBBWithIRVPBB(getVectorPreheader(), VectorPreHeader);
1006+
replaceVPBBWithIRVPBB(getMiddleBlock(), MiddleBB);
9911007
replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh);
992-
replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB);
1008+
1009+
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
1010+
<< ", UF=" << getUF() << '\n');
1011+
setName("Final VPlan");
1012+
LLVM_DEBUG(dump());
9931013

9941014
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
9951015
<< ", UF=" << getUF() << '\n');
@@ -1062,9 +1082,6 @@ void VPlan::execute(VPTransformState *State) {
10621082
}
10631083

10641084
State->CFG.DTU.flush();
1065-
assert(State->CFG.DTU.getDomTree().verify(
1066-
DominatorTree::VerificationLevel::Fast) &&
1067-
"DT not preserved correctly");
10681085
}
10691086

10701087
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
@@ -1117,11 +1134,6 @@ void VPlan::print(raw_ostream &O) const {
11171134

11181135
printLiveIns(O);
11191136

1120-
if (!getPreheader()->empty()) {
1121-
O << "\n";
1122-
getPreheader()->print(O, "", SlotTracker);
1123-
}
1124-
11251137
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
11261138
RPOT(getEntry());
11271139
for (const VPBlockBase *Block : RPOT) {
@@ -1155,6 +1167,21 @@ std::string VPlan::getName() const {
11551167
return Out;
11561168
}
11571169

1170+
VPRegionBlock *VPlan::getVectorLoopRegion() {
1171+
// TODO: Cache if possible.
1172+
for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
1173+
if (auto *R = dyn_cast<VPRegionBlock>(B))
1174+
return R;
1175+
return nullptr;
1176+
}
1177+
1178+
const VPRegionBlock *VPlan::getVectorLoopRegion() const {
1179+
for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
1180+
if (auto *R = dyn_cast<VPRegionBlock>(B))
1181+
return R;
1182+
return nullptr;
1183+
}
1184+
11581185
LLVM_DUMP_METHOD
11591186
void VPlan::printDOT(raw_ostream &O) const {
11601187
VPlanPrinter Printer(O, *this);
@@ -1205,7 +1232,6 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12051232

12061233
VPlan *VPlan::duplicate() {
12071234
// Clone blocks.
1208-
VPBasicBlock *NewPreheader = Preheader->clone();
12091235
const auto &[NewEntry, __] = cloneFrom(Entry);
12101236

12111237
BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
@@ -1215,8 +1241,7 @@ VPlan *VPlan::duplicate() {
12151241
return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
12161242
}));
12171243
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
1218-
auto *NewPlan =
1219-
new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1244+
auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
12201245
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
12211246
for (VPValue *OldLiveIn : VPLiveInsToFree) {
12221247
Old2NewVPValues[OldLiveIn] =
@@ -1236,7 +1261,6 @@ VPlan *VPlan::duplicate() {
12361261
// else NewTripCount will be created and inserted into Old2NewVPValues when
12371262
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
12381263

1239-
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12401264
remapOperands(Entry, NewEntry, Old2NewVPValues);
12411265

12421266
// Initialize remaining fields of cloned VPlan.
@@ -1288,8 +1312,6 @@ void VPlanPrinter::dump() {
12881312
OS << "edge [fontname=Courier, fontsize=30]\n";
12891313
OS << "compound=true\n";
12901314

1291-
dumpBlock(Plan.getPreheader());
1292-
12931315
for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
12941316
dumpBlock(Block);
12951317

@@ -1550,7 +1572,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
15501572
assignName(Plan.BackedgeTakenCount);
15511573
for (VPValue *LI : Plan.VPLiveInsToFree)
15521574
assignName(LI);
1553-
assignNames(Plan.getPreheader());
15541575

15551576
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
15561577
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));

0 commit comments

Comments
 (0)