@@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
2426
2426
return VectorTripCount;
2427
2427
}
2428
2428
2429
+ static void connectScalarPreheaderInVPlan (VPlan &Plan) {
2430
+ VPBlockBase *VectorPH = Plan.getVectorPreheader ();
2431
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader ();
2432
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor ();
2433
+ VPBlockUtils::disconnectBlocks (Plan.getEntry (), VectorPH);
2434
+ VPBlockUtils::connectBlocks (PredVPB, ScalarPH);
2435
+ VPBlockUtils::connectBlocks (PredVPB, VectorPH);
2436
+ }
2437
+
2438
+ static void connectCheckBlockInVPlan (VPlan &Plan, BasicBlock *CheckIRBB) {
2439
+ VPBlockBase *ScalarPH = Plan.getScalarPreheader ();
2440
+ VPBlockBase *VectorPH = Plan.getVectorPreheader ();
2441
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor ();
2442
+ VPBlockUtils::disconnectBlocks (PredVPB, VectorPH);
2443
+ VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock (CheckIRBB);
2444
+ VPBlockUtils::connectBlocks (PredVPB, CheckVPIRBB);
2445
+ VPBlockUtils::connectBlocks (CheckVPIRBB, ScalarPH);
2446
+ VPBlockUtils::connectBlocks (CheckVPIRBB, VectorPH);
2447
+ }
2448
+
2429
2449
void InnerLoopVectorizer::emitIterationCountCheck (BasicBlock *Bypass) {
2430
2450
Value *Count = getTripCount ();
2431
2451
// Reuse existing vector loop preheader for TC checks.
@@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
2511
2531
" TC check is expected to dominate Bypass" );
2512
2532
2513
2533
// Update dominator for Bypass & LoopExit (if needed).
2514
- DT->changeImmediateDominator (Bypass, TCCheckBlock);
2515
2534
BranchInst &BI =
2516
2535
*BranchInst::Create (Bypass, LoopVectorPreHeader, CheckMinIters);
2517
2536
if (hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ()))
2518
2537
setBranchWeights (BI, MinItersBypassWeights, /* IsExpected=*/ false );
2519
2538
ReplaceInstWithInst (TCCheckBlock->getTerminator (), &BI);
2520
2539
LoopBypassBlocks.push_back (TCCheckBlock);
2540
+
2541
+ connectScalarPreheaderInVPlan (Plan);
2521
2542
}
2522
2543
2523
2544
BasicBlock *InnerLoopVectorizer::emitSCEVChecks (BasicBlock *Bypass) {
@@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
2534
2555
" Should already be a bypass block due to iteration count check" );
2535
2556
LoopBypassBlocks.push_back (SCEVCheckBlock);
2536
2557
AddedSafetyChecks = true ;
2558
+
2559
+ connectCheckBlockInVPlan (Plan, SCEVCheckBlock);
2537
2560
return SCEVCheckBlock;
2538
2561
}
2539
2562
@@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
2570
2593
2571
2594
AddedSafetyChecks = true ;
2572
2595
2596
+ connectCheckBlockInVPlan (Plan, MemCheckBlock);
2573
2597
return MemCheckBlock;
2574
2598
}
2575
2599
@@ -7648,10 +7672,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7648
7672
7649
7673
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
7650
7674
// before making any changes to the CFG.
7651
- if (!BestVPlan.getPreheader ()->empty ()) {
7675
+ if (!BestVPlan.getEntry ()->empty ()) {
7652
7676
State.CFG .PrevBB = OrigLoop->getLoopPreheader ();
7653
7677
State.Builder .SetInsertPoint (OrigLoop->getLoopPreheader ()->getTerminator ());
7654
- BestVPlan.getPreheader ()->execute (&State);
7678
+ BestVPlan.getEntry ()->execute (&State);
7655
7679
}
7656
7680
if (!ILV.getTripCount ())
7657
7681
ILV.setTripCount (State.get (BestVPlan.getTripCount (), VPLane (0 )));
@@ -7859,8 +7883,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
7859
7883
DT->getNode (Bypass)->getIDom ()) &&
7860
7884
" TC check is expected to dominate Bypass" );
7861
7885
7862
- // Update dominator for Bypass.
7863
- DT->changeImmediateDominator (Bypass, TCCheckBlock);
7864
7886
LoopBypassBlocks.push_back (TCCheckBlock);
7865
7887
7866
7888
// Save the trip count so we don't have to regenerate it in the
@@ -7875,6 +7897,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
7875
7897
setBranchWeights (BI, MinItersBypassWeights, /* IsExpected=*/ false );
7876
7898
ReplaceInstWithInst (TCCheckBlock->getTerminator (), &BI);
7877
7899
7900
+ VPBlockBase *VectorPH = Plan.getVectorPreheader ();
7901
+ VPBlockBase *PredVPB = VectorPH->getSinglePredecessor ();
7902
+ if (PredVPB->getNumSuccessors () == 1 )
7903
+ connectScalarPreheaderInVPlan (Plan);
7904
+ else
7905
+ connectCheckBlockInVPlan (Plan, TCCheckBlock);
7878
7906
return TCCheckBlock;
7879
7907
}
7880
7908
@@ -7905,32 +7933,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
7905
7933
EPI.MainLoopIterationCountCheck ->getTerminator ()->replaceUsesOfWith (
7906
7934
VecEpilogueIterationCountCheck, LoopVectorPreHeader);
7907
7935
7908
- DT->changeImmediateDominator (LoopVectorPreHeader,
7909
- EPI.MainLoopIterationCountCheck );
7910
-
7911
7936
EPI.EpilogueIterationCountCheck ->getTerminator ()->replaceUsesOfWith (
7912
7937
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7913
7938
7914
7939
if (EPI.SCEVSafetyCheck )
7915
7940
EPI.SCEVSafetyCheck ->getTerminator ()->replaceUsesOfWith (
7916
7941
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7917
- if (EPI.MemSafetyCheck )
7942
+ if (EPI.MemSafetyCheck ) {
7918
7943
EPI.MemSafetyCheck ->getTerminator ()->replaceUsesOfWith (
7919
7944
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
7920
-
7921
- DT->changeImmediateDominator (
7922
- VecEpilogueIterationCountCheck,
7923
- VecEpilogueIterationCountCheck->getSinglePredecessor ());
7945
+ }
7924
7946
7925
7947
DT->changeImmediateDominator (LoopScalarPreHeader,
7926
7948
EPI.EpilogueIterationCountCheck );
7927
- if (!Cost->requiresScalarEpilogue (EPI.EpilogueVF .isVector ()))
7928
- // If there is an epilogue which must run, there's no edge from the
7929
- // middle block to exit blocks and thus no need to update the immediate
7930
- // dominator of the exit blocks.
7931
- DT->changeImmediateDominator (LoopExitBlock,
7932
- EPI.EpilogueIterationCountCheck );
7933
-
7934
7949
// Keep track of bypass blocks, as they feed start values to the induction and
7935
7950
// reduction phis in the scalar loop preheader.
7936
7951
if (EPI.SCEVSafetyCheck )
@@ -8033,6 +8048,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
8033
8048
}
8034
8049
ReplaceInstWithInst (Insert->getTerminator (), &BI);
8035
8050
LoopBypassBlocks.push_back (Insert);
8051
+
8052
+ // A new entry block has been created for the epilogue VPlan. Hook it in.
8053
+ VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock (Insert);
8054
+ VPBasicBlock *OldEntry = Plan.getEntry ();
8055
+ VPBlockUtils::reassociateBlocks (OldEntry, NewEntry);
8056
+ Plan.setEntry (NewEntry);
8057
+ for (auto &R : make_early_inc_range (*NewEntry)) {
8058
+ auto *VPIR = dyn_cast<VPIRInstruction>(&R);
8059
+ if (!VPIR || !isa<PHINode>(VPIR->getInstruction ()))
8060
+ break ;
8061
+ VPIR->eraseFromParent ();
8062
+ }
8063
+
8064
+ connectScalarPreheaderInVPlan (Plan);
8036
8065
return Insert;
8037
8066
}
8038
8067
@@ -10256,7 +10285,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10256
10285
// should be removed once induction resume value creation is done
10257
10286
// directly in VPlan.
10258
10287
EpilogILV.setTripCount (MainILV.getTripCount ());
10259
- for (auto &R : make_early_inc_range (*BestEpiPlan.getPreheader ())) {
10288
+ for (auto &R : make_early_inc_range (*BestEpiPlan.getEntry ())) {
10260
10289
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
10261
10290
if (!ExpandR)
10262
10291
continue ;
@@ -10316,8 +10345,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10316
10345
cast<VPHeaderPHIRecipe>(&R)->setStartValue (StartVal);
10317
10346
}
10318
10347
10319
- assert (DT->verify (DominatorTree::VerificationLevel::Fast) &&
10320
- " DT not preserved correctly" );
10321
10348
LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
10322
10349
DT, true , &ExpandedSCEVs);
10323
10350
++LoopsEpilogueVectorized;
@@ -10345,6 +10372,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10345
10372
checkMixedPrecision (L, ORE);
10346
10373
}
10347
10374
10375
+ assert (DT->verify (DominatorTree::VerificationLevel::Fast) &&
10376
+ " DT not preserved correctly" );
10377
+
10348
10378
std::optional<MDNode *> RemainderLoopID =
10349
10379
makeFollowupLoopID (OrigLoopID, {LLVMLoopVectorizeFollowupAll,
10350
10380
LLVMLoopVectorizeFollowupEpilogue});
0 commit comments