Skip to content

Commit 1c6fc7a

Browse files
committed
[VPlan] Replace VPRegionBlock with explicit CFG before execute (NFCI).
1 parent 424c8f9 commit 1c6fc7a

File tree

6 files changed

+165
-109
lines changed

6 files changed

+165
-109
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2409,12 +2409,6 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
24092409
// End if-block.
24102410
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
24112411
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
2412-
assert(
2413-
(Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
2414-
all_of(RepRecipe->operands(),
2415-
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
2416-
"Expected a recipe is either within a region or all of its operands "
2417-
"are defined outside the vectorized region.");
24182412
if (IfPredicateInstr)
24192413
PredicatedInstructions.push_back(Cloned);
24202414
}
@@ -2886,14 +2880,18 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28862880

28872881
// Don't apply optimizations below when no vector region remains, as they all
28882882
// require a vector loop at the moment.
2889-
if (!State.Plan->getVectorLoopRegion())
2883+
VPBasicBlock *HeaderVPBB = find_singleton<VPBasicBlock>(
2884+
vp_depth_first_shallow(State.Plan->getEntry()),
2885+
[&State](VPBlockBase *VPB, bool) {
2886+
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
2887+
return VPBB && VPBB->isHeader(State.VPDT) ? VPBB : nullptr;
2888+
});
2889+
if (!HeaderVPBB)
28902890
return;
28912891

28922892
for (Instruction *PI : PredicatedInstructions)
28932893
sinkScalarOperands(&*PI);
28942894

2895-
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
2896-
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
28972895
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
28982896

28992897
// Remove redundant induction instructions.
@@ -2994,7 +2992,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
29942992
}
29952993

29962994
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
2997-
auto Iter = vp_depth_first_deep(Plan.getEntry());
2995+
auto Iter = vp_depth_first_shallow(Plan.getEntry());
29982996
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
29992997
for (VPRecipeBase &P : VPBB->phis()) {
30002998
VPWidenPHIRecipe *VPPhi = dyn_cast<VPWidenPHIRecipe>(&P);
@@ -7804,14 +7802,19 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78047802
// 2.6. Maintain Loop Hints
78057803
// Keep all loop hints from the original loop on the vector loop (we'll
78067804
// replace the vectorizer-specific hints below).
7807-
if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) {
7805+
auto BlockRange = vp_depth_first_shallow(BestVPlan.getEntry());
7806+
auto HeaderVPBBIter = find_if(BlockRange, [&State](VPBlockBase *VPB) {
7807+
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
7808+
return VPBB && VPBB->isHeader(State.VPDT) ? VPBB : nullptr;
7809+
});
7810+
if (HeaderVPBBIter != BlockRange.end()) {
7811+
VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(*HeaderVPBBIter);
78087812
MDNode *OrigLoopID = OrigLoop->getLoopID();
78097813

78107814
std::optional<MDNode *> VectorizedLoopID =
78117815
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
78127816
LLVMLoopVectorizeFollowupVectorized});
78137817

7814-
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
78157818
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
78167819
if (VectorizedLoopID) {
78177820
L->setLoopID(*VectorizedLoopID);
@@ -7837,7 +7840,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78377840
ILV.printDebugTracesAtEnd();
78387841

78397842
// 4. Adjust branch weight of the branch in the middle block.
7840-
if (BestVPlan.getVectorLoopRegion()) {
7843+
if (HeaderVPBBIter != BlockRange.end()) {
78417844
auto *MiddleVPBB = BestVPlan.getMiddleBlock();
78427845
auto *MiddleTerm =
78437846
cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator());

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 109 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
208208
return Parent->getEnclosingBlockWithPredecessors();
209209
}
210210

211+
bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
212+
return getPredecessors().size() == 2 &&
213+
VPDT.dominates(this, getPredecessors()[1]);
214+
}
215+
211216
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
212217
iterator It = begin();
213218
while (It != end() && It->isPhi())
@@ -352,8 +357,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
352357
}
353358

354359
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
355-
VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
356-
return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
360+
return VPBB2IRBB[cast<VPBasicBlock>(R->getParent()->getPredecessors()[0])];
357361
}
358362

359363
void VPTransformState::addNewMetadata(Instruction *To,
@@ -436,14 +440,18 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
436440
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
437441
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
438442
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
439-
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
443+
BasicBlock *PredBB = CFG.VPBB2IRBB.lookup(PredVPBB);
444+
if (!PredBB)
445+
continue;
440446

441447
assert(PredBB && "Predecessor basic-block not found building successor.");
442448
auto *PredBBTerminator = PredBB->getTerminator();
443449
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
444450

445451
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
446452
if (isa<UnreachableInst>(PredBBTerminator)) {
453+
if (PredVPSuccessors.size() == 2)
454+
continue;
447455
assert(PredVPSuccessors.size() == 1 &&
448456
"Predecessor ending w/o branch must have single successor.");
449457
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -499,11 +507,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
499507
bool Replica = bool(State->Lane);
500508
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
501509

510+
if (isHeader(State->VPDT)) {
511+
// Create and register the new vector loop.
512+
Loop *PrevParentLoop = State->CurrentParentLoop;
513+
State->CurrentParentLoop = State->LI->AllocateLoop();
514+
515+
// Insert the new loop into the loop nest and register the new basic blocks
516+
// before calling any utilities such as SCEV that require valid LoopInfo.
517+
if (PrevParentLoop)
518+
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
519+
else
520+
State->LI->addTopLevelLoop(State->CurrentParentLoop);
521+
}
522+
502523
auto IsReplicateRegion = [](VPBlockBase *BB) {
503524
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
504-
return R && R->isReplicator();
525+
assert((!R || R->isReplicator()) &&
526+
"only replicate region blocks should remain");
527+
return R;
505528
};
506-
507529
// 1. Create an IR basic block.
508530
if ((Replica && this == getParent()->getEntry()) ||
509531
IsReplicateRegion(getSingleHierarchicalPredecessor())) {
@@ -537,6 +559,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
537559

538560
// 2. Fill the IR basic block with IR instructions.
539561
executeRecipes(State, NewBB);
562+
563+
// If this block is a latch, update CurrentParentLoop.
564+
if (any_of(getSuccessors(), [State, this](VPBlockBase *Succ) {
565+
auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
566+
return VPBB && VPBB->isHeader(State->VPDT) &&
567+
State->VPDT.dominates(Succ, this);
568+
}))
569+
State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
540570
}
541571

542572
VPBasicBlock *VPBasicBlock::clone() {
@@ -747,35 +777,13 @@ VPRegionBlock *VPRegionBlock::clone() {
747777
}
748778

749779
void VPRegionBlock::execute(VPTransformState *State) {
750-
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
751-
RPOT(Entry);
752-
753-
if (!isReplicator()) {
754-
// Create and register the new vector loop.
755-
Loop *PrevParentLoop = State->CurrentParentLoop;
756-
State->CurrentParentLoop = State->LI->AllocateLoop();
757-
758-
// Insert the new loop into the loop nest and register the new basic blocks
759-
// before calling any utilities such as SCEV that require valid LoopInfo.
760-
if (PrevParentLoop)
761-
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
762-
else
763-
State->LI->addTopLevelLoop(State->CurrentParentLoop);
764-
765-
// Visit the VPBlocks connected to "this", starting from it.
766-
for (VPBlockBase *Block : RPOT) {
767-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
768-
Block->execute(State);
769-
}
770-
771-
State->CurrentParentLoop = PrevParentLoop;
772-
return;
773-
}
774-
780+
assert(isReplicator() &&
781+
"Loop regions should have been lowered to plain CFG");
775782
assert(!State->Lane && "Replicating a Region with non-null instance.");
776-
777-
// Enter replicating mode.
778783
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
784+
785+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
786+
Entry);
779787
State->Lane = VPLane(0);
780788
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
781789
++Lane) {
@@ -850,6 +858,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
850858
}
851859
#endif
852860

861+
void VPRegionBlock::removeRegion() {
862+
auto *Header = cast<VPBasicBlock>(getEntry());
863+
VPBlockBase *Preheader = getSinglePredecessor();
864+
auto *Exiting = cast<VPBasicBlock>(getExiting());
865+
866+
VPBlockBase *Middle = getSingleSuccessor();
867+
VPBlockUtils::disconnectBlocks(Preheader, this);
868+
VPBlockUtils::disconnectBlocks(this, Middle);
869+
870+
for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
871+
VPB->setParent(nullptr);
872+
873+
VPBlockUtils::connectBlocks(Preheader, Header);
874+
VPBlockUtils::connectBlocks(Exiting, Middle);
875+
}
876+
853877
VPlan::VPlan(Loop *L) {
854878
setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
855879
ScalarHeader = createVPIRBasicBlock(L->getHeader());
@@ -959,57 +983,57 @@ void VPlan::execute(VPTransformState *State) {
959983
for (VPBlockBase *Block : RPOT)
960984
Block->execute(State);
961985

962-
State->CFG.DTU.flush();
963-
964-
auto *LoopRegion = getVectorLoopRegion();
965-
if (!LoopRegion)
966-
return;
967-
968-
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
969-
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
970-
971986
// Fix the latch value of canonical, reduction and first-order recurrences
972987
// phis in the vector loop.
973-
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
974-
for (VPRecipeBase &R : Header->phis()) {
975-
// Skip phi-like recipes that generate their backedege values themselves.
976-
if (isa<VPWidenPHIRecipe>(&R))
988+
for (VPBasicBlock *Header :
989+
VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
990+
if (!Header->isHeader(State->VPDT))
977991
continue;
992+
for (VPRecipeBase &R : Header->phis()) {
993+
if (isa<VPWidenPHIRecipe>(&R))
994+
continue;
978995

979-
if (isa<VPWidenInductionRecipe>(&R)) {
980-
PHINode *Phi = nullptr;
981-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
982-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
983-
} else {
984-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
985-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
986-
"recipe generating only scalars should have been replaced");
987-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
988-
Phi = cast<PHINode>(GEP->getPointerOperand());
996+
auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
997+
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
998+
999+
if (isa<VPWidenInductionRecipe>(&R)) {
1000+
PHINode *Phi = nullptr;
1001+
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1002+
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1003+
} else {
1004+
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1005+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1006+
"recipe generating only scalars should have been replaced");
1007+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1008+
Phi = cast<PHINode>(GEP->getPointerOperand());
1009+
}
1010+
1011+
Phi->setIncomingBlock(1, VectorLatchBB);
1012+
1013+
// Move the last step to the end of the latch block. This ensures
1014+
// consistent placement of all induction updates.
1015+
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1016+
Inc->moveBefore(
1017+
std::prev(VectorLatchBB->getTerminator()->getIterator()));
1018+
1019+
// Use the steps for the last part as backedge value for the induction.
1020+
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1021+
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1022+
continue;
9891023
}
9901024

991-
Phi->setIncomingBlock(1, VectorLatchBB);
992-
993-
// Move the last step to the end of the latch block. This ensures
994-
// consistent placement of all induction updates.
995-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
996-
Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
997-
998-
// Use the steps for the last part as backedge value for the induction.
999-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1000-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1001-
continue;
1025+
auto *PhiR = cast<VPSingleDefRecipe>(&R);
1026+
// VPInstructions currently model scalar Phis only.
1027+
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1028+
(isa<VPReductionPHIRecipe>(PhiR) &&
1029+
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1030+
1031+
Value *Phi = State->get(PhiR, NeedsScalar);
1032+
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1033+
// not.
1034+
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1035+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10021036
}
1003-
1004-
auto *PhiR = cast<VPSingleDefRecipe>(&R);
1005-
// VPInstructions currently model scalar Phis only.
1006-
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1007-
(isa<VPReductionPHIRecipe>(PhiR) &&
1008-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1009-
Value *Phi = State->get(PhiR, NeedsScalar);
1010-
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1011-
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1012-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10131037
}
10141038
}
10151039

@@ -1361,16 +1385,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
13611385

13621386
#endif
13631387

1364-
/// Returns true if there is a vector loop region and \p VPV is defined in a
1365-
/// loop region.
1366-
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1367-
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1368-
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1369-
DefR->getParent()->getEnclosingLoopRegion());
1370-
}
1371-
13721388
bool VPValue::isDefinedOutsideLoopRegions() const {
1373-
return !isDefinedInsideLoopRegions(this);
1389+
auto *DefR = getDefiningRecipe();
1390+
if (!DefR)
1391+
return true;
1392+
1393+
const VPBasicBlock *DefVPBB = DefR->getParent();
1394+
auto *Plan = DefVPBB->getPlan();
1395+
if (Plan->getVectorLoopRegion())
1396+
return !DefR->getParent()->getEnclosingLoopRegion();
1397+
return DefVPBB == Plan->getEntry();
13741398
}
13751399
void VPValue::replaceAllUsesWith(VPValue *New) {
13761400
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class SCEV;
5757
class Type;
5858
class VPBasicBlock;
5959
class VPBuilder;
60+
class VPDominatorTree;
6061
class VPRegionBlock;
6162
class VPlan;
6263
class VPLane;
@@ -3253,6 +3254,8 @@ class VPBasicBlock : public VPBlockBase {
32533254
/// the cloned recipes.
32543255
VPBasicBlock *clone() override;
32553256

3257+
bool isHeader(const VPDominatorTree &VPDT) const;
3258+
32563259
protected:
32573260
/// Execute the recipes in the IR basic block \p BB.
32583261
void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3399,6 +3402,10 @@ class VPRegionBlock : public VPBlockBase {
33993402
/// Clone all blocks in the single-entry single-exit region of the block and
34003403
/// their recipes without updating the operands of the cloned recipes.
34013404
VPRegionBlock *clone() override;
3405+
3406+
/// Remove the current region from its VPlan, connecting its predecessor to
3407+
/// its entry and exiting block to its successor.
3408+
void removeRegion();
34023409
};
34033410

34043411
/// VPlan models a candidate for vectorization, encoding various decisions take

0 commit comments

Comments
 (0)