@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
2426
2426
return VectorTripCount;
2427
2427
}
2428
2428
2429
+ static void connectScalarPreheaderInVPlan(VPlan &Plan) {
2430
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
2431
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader();
2432
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
2433
+ VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH);
2434
+ VPBlockUtils::connectBlocks(PredVPB, ScalarPH);
2435
+ VPBlockUtils::connectBlocks(PredVPB, VectorPH);
2436
+ }
2437
+
2438
+ static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
2439
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader();
2440
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
2441
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
2442
+ VPBlockUtils::disconnectBlocks(PredVPB, VectorPH);
2443
+ VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
2444
+ VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB);
2445
+ VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH);
2446
+ VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH);
2447
+ }
2448
+
2429
2449
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
2430
2450
Value *Count = getTripCount();
2431
2451
// Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
2511
2531
"TC check is expected to dominate Bypass");
2512
2532
2513
2533
// Update dominator for Bypass & LoopExit (if needed).
2514
- DT->changeImmediateDominator(Bypass, TCCheckBlock);
2515
2534
BranchInst &BI =
2516
2535
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
2517
2536
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
2518
2537
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
2519
2538
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
2520
2539
LoopBypassBlocks.push_back(TCCheckBlock);
2540
+
2541
+ connectScalarPreheaderInVPlan(Plan);
2521
2542
}
2522
2543
2523
2544
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
2534
2555
"Should already be a bypass block due to iteration count check");
2535
2556
LoopBypassBlocks.push_back(SCEVCheckBlock);
2536
2557
AddedSafetyChecks = true;
2558
+
2559
+ connectCheckBlockInVPlan(Plan, SCEVCheckBlock);
2537
2560
return SCEVCheckBlock;
2538
2561
}
2539
2562
@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
2570
2593
2571
2594
AddedSafetyChecks = true;
2572
2595
2596
+ connectCheckBlockInVPlan(Plan, MemCheckBlock);
2573
2597
return MemCheckBlock;
2574
2598
}
2575
2599
@@ -7649,10 +7673,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7649
7673
7650
7674
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
7651
7675
// before making any changes to the CFG.
7652
- if (!BestVPlan.getPreheader ()->empty()) {
7676
+ if (!BestVPlan.getEntry ()->empty()) {
7653
7677
State.CFG.PrevBB = OrigLoop->getLoopPreheader();
7654
7678
State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator());
7655
- BestVPlan.getPreheader ()->execute(&State);
7679
+ BestVPlan.getEntry ()->execute(&State);
7656
7680
}
7657
7681
if (!ILV.getTripCount())
7658
7682
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7861,8 +7885,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
7861
7885
DT->getNode(Bypass)->getIDom()) &&
7862
7886
"TC check is expected to dominate Bypass");
7863
7887
7864
- // Update dominator for Bypass.
7865
- DT->changeImmediateDominator(Bypass, TCCheckBlock);
7866
7888
LoopBypassBlocks.push_back(TCCheckBlock);
7867
7889
7868
7890
// Save the trip count so we don't have to regenerate it in the
@@ -7877,6 +7899,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
7877
7899
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
7878
7900
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
7879
7901
7902
+ VPBlockBase *VectorPH = Plan.getVectorPreheader();
7903
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
7904
+ if (PredVPB->getNumSuccessors() == 1)
7905
+ connectScalarPreheaderInVPlan(Plan);
7906
+ else
7907
+ connectCheckBlockInVPlan(Plan, TCCheckBlock);
7880
7908
return TCCheckBlock;
7881
7909
}
7882
7910
@@ -7907,32 +7935,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
7907
7935
EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
7908
7936
VecEpilogueIterationCountCheck, LoopVectorPreHeader);
7909
7937
7910
- DT->changeImmediateDominator(LoopVectorPreHeader,
7911
- EPI.MainLoopIterationCountCheck);
7912
-
7913
7938
EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
7914
7939
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7915
7940
7916
7941
if (EPI.SCEVSafetyCheck)
7917
7942
EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
7918
7943
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7919
- if (EPI.MemSafetyCheck)
7944
+ if (EPI.MemSafetyCheck) {
7920
7945
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
7921
7946
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7922
-
7923
- DT->changeImmediateDominator(
7924
- VecEpilogueIterationCountCheck,
7925
- VecEpilogueIterationCountCheck->getSinglePredecessor());
7947
+ }
7926
7948
7927
7949
DT->changeImmediateDominator(LoopScalarPreHeader,
7928
7950
EPI.EpilogueIterationCountCheck);
7929
- if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
7930
- // If there is an epilogue which must run, there's no edge from the
7931
- // middle block to exit blocks and thus no need to update the immediate
7932
- // dominator of the exit blocks.
7933
- DT->changeImmediateDominator(LoopExitBlock,
7934
- EPI.EpilogueIterationCountCheck);
7935
-
7936
7951
// Keep track of bypass blocks, as they feed start values to the induction and
7937
7952
// reduction phis in the scalar loop preheader.
7938
7953
if (EPI.SCEVSafetyCheck)
@@ -8035,6 +8050,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
8035
8050
}
8036
8051
ReplaceInstWithInst(Insert->getTerminator(), &BI);
8037
8052
LoopBypassBlocks.push_back(Insert);
8053
+
8054
+ // A new entry block has been created for the epilogue VPlan. Hook it in.
8055
+ VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert);
8056
+ VPBasicBlock *OldEntry = Plan.getEntry();
8057
+ VPBlockUtils::reassociateBlocks(OldEntry, NewEntry);
8058
+ Plan.setEntry(NewEntry);
8059
+ for (auto &R : make_early_inc_range(*NewEntry)) {
8060
+ auto *VPIR = dyn_cast<VPIRInstruction>(&R);
8061
+ if (!VPIR || !isa<PHINode>(VPIR->getInstruction()))
8062
+ break;
8063
+ VPIR->eraseFromParent();
8064
+ }
8065
+
8066
+ connectScalarPreheaderInVPlan(Plan);
8038
8067
return Insert;
8039
8068
}
8040
8069
@@ -10270,7 +10299,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10270
10299
// should be removed once induction resume value creation is done
10271
10300
// directly in VPlan.
10272
10301
EpilogILV.setTripCount(MainILV.getTripCount());
10273
- for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader ())) {
10302
+ for (auto &R : make_early_inc_range(*BestEpiPlan.getEntry ())) {
10274
10303
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
10275
10304
if (!ExpandR)
10276
10305
continue;
@@ -10330,8 +10359,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10330
10359
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
10331
10360
}
10332
10361
10333
- assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
10334
- "DT not preserved correctly");
10335
10362
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
10336
10363
DT, true, &ExpandedSCEVs);
10337
10364
++LoopsEpilogueVectorized;
@@ -10359,6 +10386,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10359
10386
checkMixedPrecision(L, ORE);
10360
10387
}
10361
10388
10389
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
10390
+ "DT not preserved correctly");
10391
+
10362
10392
std::optional<MDNode *> RemainderLoopID =
10363
10393
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
10364
10394
LLVMLoopVectorizeFollowupEpilogue});
0 commit comments