Skip to content

Commit f81a43c

Browse files
committed
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring VPLiveOut, as the remaining live-outs can now be handled by adding operands to the wrapped phis in the scalar loop header. Note that the current version only includes the scalar loop header, no other loop blocks and also does not wrap it in a region block. This can either be included in this PR or in follow-ups as needed.
1 parent 09ba83b commit f81a43c

File tree

8 files changed

+54
-132
lines changed

8 files changed

+54
-132
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2931,10 +2931,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29312931
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
29322932
}
29332933

2934-
// Fix live-out phis not already fixed earlier.
2935-
for (const auto &KV : Plan.getLiveOuts())
2936-
KV.second->fixPhi(Plan, State);
2937-
29382934
for (Instruction *PI : PredicatedInstructions)
29392935
sinkScalarOperands(&*PI);
29402936

@@ -8865,7 +8861,14 @@ static void addLiveOutsForFirstOrderRecurrences(
88658861
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
88668862
"scalar.recur.init");
88678863
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8868-
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
8864+
for (VPRecipeBase &R :
8865+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
8866+
auto *IRI = cast<VPIRInstruction>(&R);
8867+
if (&IRI->getInstruction() == FORPhi) {
8868+
IRI->addOperand(ResumePhiRecipe);
8869+
break;
8870+
}
8871+
}
88698872

88708873
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
88718874
// Extract the penultimate value of the recurrence and use it as operand for

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
456456
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
457457
executeRecipes(State, getIRBasicBlock());
458458
if (getSingleSuccessor()) {
459-
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
460-
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
461-
Br->setOperand(0, nullptr);
462-
getIRBasicBlock()->getTerminator()->eraseFromParent();
459+
auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
460+
if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
461+
getIRBasicBlock()->getSingleSuccessor()) {
462+
cast<BranchInst>(getIRBasicBlock()->getTerminator())
463+
->setOperand(0, nullptr);
464+
} else {
465+
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
466+
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
467+
Br->setOperand(0, nullptr);
468+
getIRBasicBlock()->getTerminator()->eraseFromParent();
469+
}
463470
}
464471

465472
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -843,10 +850,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
843850
#endif
844851

845852
VPlan::~VPlan() {
846-
for (auto &KV : LiveOuts)
847-
delete KV.second;
848-
LiveOuts.clear();
849-
850853
if (Entry) {
851854
VPValue DummyValue;
852855
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -909,6 +912,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
909912
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
910913

911914
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
915+
VPBasicBlock *ScalarHeader =
916+
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
917+
VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
912918
if (!RequiresScalarEpilogueCheck) {
913919
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
914920
return Plan;
@@ -1058,6 +1064,8 @@ void VPlan::execute(VPTransformState *State) {
10581064
BrInst->insertBefore(MiddleBB->getTerminator());
10591065
MiddleBB->getTerminator()->eraseFromParent();
10601066
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
1067+
State->CFG.DTU.applyUpdates(
1068+
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
10611069

10621070
// Generate code in the loop pre-header and body.
10631071
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1176,12 +1184,6 @@ void VPlan::print(raw_ostream &O) const {
11761184
Block->print(O, "", SlotTracker);
11771185
}
11781186

1179-
if (!LiveOuts.empty())
1180-
O << "\n";
1181-
for (const auto &KV : LiveOuts) {
1182-
KV.second->print(O, SlotTracker);
1183-
}
1184-
11851187
O << "}\n";
11861188
}
11871189

@@ -1218,11 +1220,6 @@ LLVM_DUMP_METHOD
12181220
void VPlan::dump() const { print(dbgs()); }
12191221
#endif
12201222

1221-
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1222-
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1223-
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1224-
}
1225-
12261223
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12271224
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
12281225
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1290,10 +1287,6 @@ VPlan *VPlan::duplicate() {
12901287
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12911288
remapOperands(Entry, NewEntry, Old2NewVPValues);
12921289

1293-
// Clone live-outs.
1294-
for (const auto &[_, LO] : LiveOuts)
1295-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1296-
12971290
// Initialize remaining fields of cloned VPlan.
12981291
NewPlan->VFs = VFs;
12991292
NewPlan->UFs = UFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -655,48 +655,6 @@ class VPBlockBase {
655655
virtual VPBlockBase *clone() = 0;
656656
};
657657

658-
/// A value that is used outside the VPlan. The operand of the user needs to be
659-
/// added to the associated phi node. The incoming block from VPlan is
660-
/// determined by where the VPValue is defined: if it is defined by a recipe
661-
/// outside a region, its parent block is used, otherwise the middle block is
662-
/// used.
663-
class VPLiveOut : public VPUser {
664-
PHINode *Phi;
665-
666-
public:
667-
VPLiveOut(PHINode *Phi, VPValue *Op)
668-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
669-
670-
static inline bool classof(const VPUser *U) {
671-
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
672-
}
673-
674-
/// Fix the wrapped phi node. This means adding an incoming value to exit
675-
/// block phi's from the vector loop via middle block (values from scalar loop
676-
/// already reach these phi's), and updating the value to scalar header phi's
677-
/// from the scalar preheader.
678-
void fixPhi(VPlan &Plan, VPTransformState &State);
679-
680-
/// Returns true if the VPLiveOut uses scalars of operand \p Op.
681-
bool usesScalars(const VPValue *Op) const override {
682-
assert(is_contained(operands(), Op) &&
683-
"Op must be an operand of the recipe");
684-
return true;
685-
}
686-
687-
PHINode *getPhi() const { return Phi; }
688-
689-
/// Live-outs are marked as only using the first part during the transition
690-
/// to unrolling directly on VPlan.
691-
/// TODO: Remove after unroller transition.
692-
bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
693-
694-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
695-
/// Print the VPLiveOut to \p O.
696-
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
697-
#endif
698-
};
699-
700658
/// Struct to hold various analysis needed for cost computations.
701659
struct VPCostContext {
702660
const TargetTransformInfo &TTI;
@@ -3497,11 +3455,6 @@ class VPlan {
34973455
/// definitions are VPValues that hold a pointer to their underlying IR.
34983456
SmallVector<VPValue *, 16> VPLiveInsToFree;
34993457

3500-
/// Values used outside the plan. It contains live-outs that need fixing. Any
3501-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3502-
/// live-outs are fixed via VPLiveOut::fixPhi.
3503-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3504-
35053458
/// Mapping from SCEVs to the VPValues representing their expansions.
35063459
/// NOTE: This mapping is temporary and will be removed once all users have
35073460
/// been modeled in VPlan directly.
@@ -3681,12 +3634,6 @@ class VPlan {
36813634
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
36823635
}
36833636

3684-
void addLiveOut(PHINode *PN, VPValue *V);
3685-
3686-
const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
3687-
return LiveOuts;
3688-
}
3689-
36903637
VPValue *getSCEVExpansion(const SCEV *S) const {
36913638
return SCEVToExpansion.lookup(S);
36923639
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -202,35 +202,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
202202
}
203203
}
204204

205-
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
206-
VPValue *ExitValue = getOperand(0);
207-
VPBasicBlock *MiddleVPBB =
208-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
209-
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
210-
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
211-
// Values leaving the vector loop reach live out phi's in the exiting block
212-
// via middle block.
213-
auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
214-
? MiddleVPBB
215-
: ExitingVPBB;
216-
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
217-
Value *V = State.get(ExitValue, VPLane(0));
218-
if (Phi->getBasicBlockIndex(PredBB) != -1)
219-
Phi->setIncomingValueForBlock(PredBB, V);
220-
else
221-
Phi->addIncoming(V, PredBB);
222-
}
223-
224-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
225-
void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
226-
O << "Live-out ";
227-
getPhi()->printAsOperand(O);
228-
O << " = ";
229-
getOperand(0)->printAsOperand(O, SlotTracker);
230-
O << "\n";
231-
}
232-
#endif
233-
234205
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
235206
assert(!Parent && "Recipe already in some VPBasicBlock");
236207
assert(InsertPos->getParent() &&
@@ -855,7 +826,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
855826
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
856827
Value *V = State.get(ExitValue, VPLane(Lane));
857828
auto *Phi = cast<PHINode>(&I);
858-
Phi->addIncoming(V, PredBB);
829+
if (Phi->getBasicBlockIndex(PredBB) == -1)
830+
Phi->addIncoming(V, PredBB);
831+
else
832+
Phi->setIncomingValueForBlock(PredBB, V);
859833
}
860834

861835
// Advance the insert point after the wrapped IR instruction. This allows

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
378378
// Don't fold the exit block of the Plan into its single predecessor for
379379
// now.
380380
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
381-
if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
381+
if (!VPBB->getParent())
382382
continue;
383383
auto *PredVPBB =
384384
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
264264
return;
265265

266266
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
267+
VPValue *Op0, *Op1;
268+
if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
269+
m_VPValue(Op1)))) {
270+
addUniformForAllParts(VPI);
271+
return;
272+
}
273+
267274
if (vputils::onlyFirstPartUsed(VPI)) {
268275
addUniformForAllParts(VPI);
269276
return;
@@ -449,11 +456,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
449456
Part++;
450457
}
451458

452-
// Remap the operand of live-outs to the last part.
453-
for (const auto &[_, LO] : Plan.getLiveOuts()) {
454-
VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
455-
LO->setOperand(0, In);
456-
}
457-
458459
VPlanTransforms::removeDeadRecipes(Plan);
459460
}

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
244244
return false;
245245
}
246246

247-
VPBlockBase *MiddleBB =
248-
IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
249-
if (IRBB != IRBB->getPlan()->getPreheader() &&
250-
IRBB->getSinglePredecessor() != MiddleBB) {
251-
errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
252-
"middle-block at the moment!\n";
253-
return false;
254-
}
255247
return true;
256248
}
257249

@@ -416,12 +408,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
416408
return false;
417409
}
418410

419-
for (const auto &KV : Plan.getLiveOuts())
420-
if (KV.second->getNumOperands() != 1) {
421-
errs() << "live outs must have a single operand\n";
422-
return false;
423-
}
424-
425411
return true;
426412
}
427413

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
10771077
; CHECK-NEXT: No successors
10781078
; CHECK-EMPTY:
10791079
; CHECK-NEXT: scalar.ph
1080+
; CHECK-NEXT: Successor(s): ir-bb<loop>
1081+
; CHECK-EMPTY:
1082+
; CHECK-NEXT: ir-bb<loop>:
1083+
; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
1084+
; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1
1085+
; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
1086+
; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16
1087+
; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
1088+
; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
1089+
; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16
1090+
; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0
10801091
; CHECK-NEXT: No successors
10811092
; CHECK-NEXT: }
10821093
;
@@ -1156,6 +1167,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
11561167
; CHECK-NEXT: No successors
11571168
; CHECK-EMPTY:
11581169
; CHECK-NEXT: scalar.ph:
1170+
; CHECK-NEXT: Successor(s): ir-bb<loop.header>
1171+
; CHECK-EMPTY:
1172+
; CHECK-NEXT: ir-bb<loop.header>:
1173+
; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
1174+
; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
1175+
; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1
1176+
; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0
11591177
; CHECK-NEXT: No successors
11601178
; CHECK-NEXT: }
11611179
;

0 commit comments

Comments
 (0)