Skip to content

Commit 5f8fabe

Browse files
committed
[VPlan] Remove loop region in optimizeForVFAndUF.
1 parent d3614bc commit 5f8fabe

16 files changed

+359
-426
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 44 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2927,6 +2927,9 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29272927
for (const auto &KV : Plan.getLiveOuts())
29282928
KV.second->fixPhi(Plan, State);
29292929

2930+
if (!isa<VPRegionBlock>(State.Plan->getEntry()->getSingleSuccessor()))
2931+
return;
2932+
29302933
for (Instruction *PI : PredicatedInstructions)
29312934
sinkScalarOperands(&*PI);
29322935

@@ -7537,7 +7540,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
75377540
LLVM_DEBUG(BestVPlan.dump());
75387541

75397542
// Perform the actual loop transformation.
7540-
VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan);
7543+
VPTransformState State(BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan,
7544+
Legal->getWidestInductionType());
75417545

75427546
// 0. Generate SCEV-dependent code into the preheader, including TripCount,
75437547
// before making any changes to the CFG.
@@ -7598,14 +7602,15 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
75987602
BestVPlan.execute(&State);
75997603

76007604
// 2.5 Collect reduction resume values.
7601-
auto *ExitVPBB =
7602-
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7603-
if (IsEpilogueVectorization)
7605+
if (IsEpilogueVectorization) {
7606+
auto *ExitVPBB = cast<VPBasicBlock>(
7607+
BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
76047608
for (VPRecipeBase &R : *ExitVPBB) {
76057609
updateAndCollectMergePhiForReductionForEpilogueVectorization(
76067610
dyn_cast<VPInstruction>(&R), State, OrigLoop,
76077611
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
76087612
}
7613+
}
76097614

76107615
// 2.6. Maintain Loop Hints
76117616
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -7616,24 +7621,26 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76167621
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
76177622
LLVMLoopVectorizeFollowupVectorized});
76187623

7619-
VPBasicBlock *HeaderVPBB =
7620-
BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
7621-
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
7622-
if (VectorizedLoopID)
7623-
L->setLoopID(*VectorizedLoopID);
7624-
else {
7625-
// Keep all loop hints from the original loop on the vector loop (we'll
7626-
// replace the vectorizer-specific hints below).
7627-
if (MDNode *LID = OrigLoop->getLoopID())
7628-
L->setLoopID(LID);
7629-
7630-
LoopVectorizeHints Hints(L, true, *ORE);
7631-
Hints.setAlreadyVectorized();
7624+
if (auto *R =
7625+
dyn_cast<VPRegionBlock>(BestVPlan.getEntry()->getSingleSuccessor())) {
7626+
VPBasicBlock *HeaderVPBB = R->getEntryBasicBlock();
7627+
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
7628+
if (VectorizedLoopID)
7629+
L->setLoopID(*VectorizedLoopID);
7630+
else {
7631+
// Keep all loop hints from the original loop on the vector loop (we'll
7632+
// replace the vectorizer-specific hints below).
7633+
if (MDNode *LID = OrigLoop->getLoopID())
7634+
L->setLoopID(LID);
7635+
7636+
LoopVectorizeHints Hints(L, true, *ORE);
7637+
Hints.setAlreadyVectorized();
7638+
}
7639+
TargetTransformInfo::UnrollingPreferences UP;
7640+
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
7641+
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
7642+
addRuntimeUnrollDisableMetaData(L);
76327643
}
7633-
TargetTransformInfo::UnrollingPreferences UP;
7634-
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
7635-
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
7636-
addRuntimeUnrollDisableMetaData(L);
76377644

76387645
// 3. Fix the vectorized code: take care of header phi's, live-outs,
76397646
// predication, updating analyses.
@@ -7642,15 +7649,20 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76427649
ILV.printDebugTracesAtEnd();
76437650

76447651
// 4. Adjust branch weight of the branch in the middle block.
7645-
auto *MiddleTerm =
7646-
cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7647-
if (MiddleTerm->isConditional() &&
7648-
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7649-
// Assume that `Count % VectorTripCount` is equally distributed.
7650-
unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7651-
assert(TripCount > 0 && "trip count should not be zero");
7652-
const uint32_t Weights[] = {1, TripCount - 1};
7653-
setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7652+
if (auto *R =
7653+
dyn_cast<VPRegionBlock>(BestVPlan.getEntry()->getSingleSuccessor())) {
7654+
auto *ExitVPBB = cast<VPBasicBlock>(R->getSingleSuccessor());
7655+
7656+
auto *MiddleTerm =
7657+
cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7658+
if (MiddleTerm->isConditional() &&
7659+
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7660+
// Assume that `Count % VectorTripCount` is equally distributed.
7661+
unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7662+
assert(TripCount > 0 && "trip count should not be zero");
7663+
const uint32_t Weights[] = {1, TripCount - 1};
7664+
setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7665+
}
76547666
}
76557667

76567668
return State.ExpandedSCEVs;
@@ -9464,7 +9476,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
94649476
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
94659477
Kind, cast_if_present<BinaryOperator>(FPBinOp));
94669478
DerivedIV->setName("offset.idx");
9467-
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
9479+
assert((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&
9480+
"IV didn't need transforming?");
94689481

94699482
State.set(this, DerivedIV, VPLane(0));
94709483
}

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 68 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -224,9 +224,10 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
224224

225225
VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
226226
DominatorTree *DT, IRBuilderBase &Builder,
227-
InnerLoopVectorizer *ILV, VPlan *Plan)
227+
InnerLoopVectorizer *ILV, VPlan *Plan,
228+
Type *CanonicalIVTy)
228229
: VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229-
LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
230+
LVer(nullptr), TypeAnalysis(CanonicalIVTy) {}
230231

231232
Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) {
232233
if (Def->isLiveIn())
@@ -275,8 +276,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
275276
// Place the code for broadcasting invariant variables in the new preheader.
276277
IRBuilder<>::InsertPointGuard Guard(Builder);
277278
if (SafeToHoist) {
278-
BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB[cast<VPBasicBlock>(
279-
Plan->getVectorLoopRegion()->getSinglePredecessor())];
279+
BasicBlock *LoopVectorPreHeader =
280+
CFG.VPBB2IRBB[cast<VPBasicBlock>(Plan->getEntry())];
280281
if (LoopVectorPreHeader)
281282
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
282283
}
@@ -417,6 +418,12 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
417418
PrevBB->getParent(), CFG.ExitBB);
418419
LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
419420

421+
connectToPredecessors(NewBB, CFG);
422+
return NewBB;
423+
}
424+
425+
void VPBasicBlock::connectToPredecessors(BasicBlock *NewBB,
426+
VPTransformState::CFGState &CFG) {
420427
// Hook up the new basic block to its predecessors.
421428
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
422429
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
@@ -447,38 +454,14 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
447454
}
448455
CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
449456
}
450-
return NewBB;
451457
}
452-
453458
void VPIRBasicBlock::execute(VPTransformState *State) {
454459
assert(getHierarchicalSuccessors().size() <= 2 &&
455460
"VPIRBasicBlock can have at most two successors at the moment!");
456461
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
457462
executeRecipes(State, getIRBasicBlock());
458-
if (getSingleSuccessor()) {
459-
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
460-
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
461-
Br->setOperand(0, nullptr);
462-
getIRBasicBlock()->getTerminator()->eraseFromParent();
463-
}
464-
465-
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
466-
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
467-
BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
468-
assert(PredBB && "Predecessor basic-block not found building successor.");
469-
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
470463

471-
auto *PredBBTerminator = PredBB->getTerminator();
472-
auto *TermBr = cast<BranchInst>(PredBBTerminator);
473-
// Set each forward successor here when it is created, excluding
474-
// backedges. A backward successor is set when the branch is created.
475-
const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
476-
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
477-
assert(!TermBr->getSuccessor(idx) &&
478-
"Trying to reset an existing successor block.");
479-
TermBr->setSuccessor(idx, IRBB);
480-
State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}});
481-
}
464+
connectToPredecessors(getIRBasicBlock(), State->CFG);
482465
}
483466

484467
void VPBasicBlock::execute(VPTransformState *State) {
@@ -962,7 +945,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
962945

963946
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
964947
// FIXME: Model VF * UF computation completely in VPlan.
965-
assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
966948
unsigned UF = getUF();
967949
if (VF.getNumUsers()) {
968950
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
@@ -1034,8 +1016,13 @@ void VPlan::execute(VPTransformState *State) {
10341016
// skeleton creation, so we can only create the VPIRBasicBlocks now during
10351017
// VPlan execution rather than earlier during VPlan construction.
10361018
BasicBlock *MiddleBB = State->CFG.ExitBB;
1037-
VPBasicBlock *MiddleVPBB =
1038-
cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
1019+
VPBlockBase *Leaf = nullptr;
1020+
for (VPBlockBase *VPB : vp_depth_first_shallow(getEntry()))
1021+
if (VPB->getNumSuccessors() == 0) {
1022+
Leaf = VPB;
1023+
break;
1024+
}
1025+
VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(Leaf->getSinglePredecessor());
10391026
// Find the VPBB for the scalar preheader, relying on the current structure
10401027
// when creating the middle block and its successrs: if there's a single
10411028
// predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1063,53 +1050,59 @@ void VPlan::execute(VPTransformState *State) {
10631050
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
10641051
Block->execute(State);
10651052

1066-
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
1067-
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1068-
1069-
// Fix the latch value of canonical, reduction and first-order recurrences
1070-
// phis in the vector loop.
1071-
VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
1072-
for (VPRecipeBase &R : Header->phis()) {
1073-
// Skip phi-like recipes that generate their backedege values themselves.
1074-
if (isa<VPWidenPHIRecipe>(&R))
1075-
continue;
1076-
1077-
if (isa<VPWidenPointerInductionRecipe>(&R) ||
1078-
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1079-
PHINode *Phi = nullptr;
1080-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1081-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1082-
} else {
1083-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1084-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1085-
"recipe generating only scalars should have been replaced");
1086-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1087-
Phi = cast<PHINode>(GEP->getPointerOperand());
1088-
}
1089-
1090-
Phi->setIncomingBlock(1, VectorLatchBB);
1053+
if (auto *LoopRegion =
1054+
dyn_cast<VPRegionBlock>(getEntry()->getSingleSuccessor())) {
1055+
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1056+
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1057+
1058+
// Fix the latch value of canonical, reduction and first-order recurrences
1059+
// phis in the vector loop.
1060+
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1061+
for (VPRecipeBase &R : Header->phis()) {
1062+
// Skip phi-like recipes that generate their backedege values themselves.
1063+
if (isa<VPWidenPHIRecipe>(&R))
1064+
continue;
10911065

1092-
// Move the last step to the end of the latch block. This ensures
1093-
// consistent placement of all induction updates.
1094-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1095-
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1066+
if (isa<VPWidenPointerInductionRecipe>(&R) ||
1067+
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1068+
PHINode *Phi = nullptr;
1069+
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1070+
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1071+
} else {
1072+
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1073+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1074+
"recipe generating only scalars should have been replaced");
1075+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1076+
Phi = cast<PHINode>(GEP->getPointerOperand());
1077+
}
1078+
1079+
Phi->setIncomingBlock(1, VectorLatchBB);
1080+
1081+
// Move the last step to the end of the latch block. This ensures
1082+
// consistent placement of all induction updates.
1083+
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1084+
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1085+
1086+
// Use the steps for the last part as backedge value for the induction.
1087+
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1088+
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1089+
continue;
1090+
}
10961091

1097-
// Use the steps for the last part as backedge value for the induction.
1098-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1099-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1100-
continue;
1092+
// For canonical IV, first-order recurrences and in-order reduction phis,
1093+
// only a single part is generated, which provides the last part from the
1094+
// previous iteration. For non-ordered reductions all UF parts are
1095+
// generated.
1096+
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1097+
bool NeedsScalar =
1098+
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1099+
(isa<VPReductionPHIRecipe>(PhiR) &&
1100+
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1101+
Value *Phi = State->get(PhiR, NeedsScalar);
1102+
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1103+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
11011104
}
1102-
1103-
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1104-
bool NeedsScalar =
1105-
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1106-
(isa<VPReductionPHIRecipe>(PhiR) &&
1107-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1108-
Value *Phi = State->get(PhiR, NeedsScalar);
1109-
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1110-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
11111105
}
1112-
11131106
State->CFG.DTU.flush();
11141107
assert(State->CFG.DTU.getDomTree().verify(
11151108
DominatorTree::VerificationLevel::Fast) &&

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ class VPLane {
236236
struct VPTransformState {
237237
VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
238238
DominatorTree *DT, IRBuilderBase &Builder,
239-
InnerLoopVectorizer *ILV, VPlan *Plan);
239+
InnerLoopVectorizer *ILV, VPlan *Plan, Type *CanonicalIVTy);
240240

241241
/// The chosen Vectorization Factor of the loop being vectorized.
242242
ElementCount VF;
@@ -3378,6 +3378,8 @@ class VPBasicBlock : public VPBlockBase {
33783378
protected:
33793379
/// Execute the recipes in the IR basic block \p BB.
33803380
void executeRecipes(VPTransformState *State, BasicBlock *BB);
3381+
void connectToPredecessors(BasicBlock *NewBB,
3382+
VPTransformState::CFGState &CFG);
33813383

33823384
private:
33833385
/// Create an IR BasicBlock to hold the output instructions generated by this
@@ -3499,6 +3501,7 @@ class VPRegionBlock : public VPBlockBase {
34993501
assert(!isReplicator() && "should only get pre-header of loop regions");
35003502
return getSinglePredecessor()->getExitingBasicBlock();
35013503
}
3504+
void clearEntry() { Entry = nullptr; }
35023505

35033506
/// An indicator whether this region is to generate multiple replicated
35043507
/// instances of output IR corresponding to its VPBlockBases.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,9 @@ bool VPRecipeBase::mayHaveSideEffects() const {
210210

211211
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
212212
VPValue *ExitValue = getOperand(0);
213-
VPBasicBlock *MiddleVPBB =
214-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
213+
auto *Region = dyn_cast<VPRegionBlock>(Plan.getEntry()->getSingleSuccessor());
214+
VPBasicBlock *MiddleVPBB = dyn_cast_or_null<VPBasicBlock>(
215+
Region ? Region->getSingleSuccessor() : nullptr);
215216
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
216217
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
217218
// Values leaving the vector loop reach live out phi's in the exiting block
@@ -2208,7 +2209,9 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
22082209
// Replace the temporary unreachable terminator with a new conditional branch,
22092210
// whose two destinations will be set later when they are created.
22102211
auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
2211-
assert(isa<UnreachableInst>(CurrentTerminator) &&
2212+
assert((isa<UnreachableInst>(CurrentTerminator) ||
2213+
(isa<BranchInst>(CurrentTerminator) &&
2214+
!CurrentTerminator->getOperand(0))) &&
22122215
"Expected to replace unreachable terminator with conditional branch.");
22132216
auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
22142217
CondBr->setSuccessor(0, nullptr);

0 commit comments

Comments
 (0)