Skip to content

Commit be27cb6

Browse files
committed
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring VPLiveOut, as the remaining live-outs can now be handled by adding operands to the wrapped phis in the scalar loop header. Note that the current version only includes the scalar loop header, no other loop blocks and also does not wrap it in a region block. This can either be included in this PR or in follow-ups as needed.
1 parent 725eb6b commit be27cb6

File tree

8 files changed

+54
-132
lines changed

8 files changed

+54
-132
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2931,10 +2931,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29312931
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
29322932
}
29332933

2934-
// Fix live-out phis not already fixed earlier.
2935-
for (const auto &KV : Plan.getLiveOuts())
2936-
KV.second->fixPhi(Plan, State);
2937-
29382934
for (Instruction *PI : PredicatedInstructions)
29392935
sinkScalarOperands(&*PI);
29402936

@@ -8852,7 +8848,14 @@ static void addLiveOutsForFirstOrderRecurrences(
88528848
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
88538849
"scalar.recur.init");
88548850
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8855-
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
8851+
for (VPRecipeBase &R :
8852+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
8853+
auto *IRI = cast<VPIRInstruction>(&R);
8854+
if (&IRI->getInstruction() == FORPhi) {
8855+
IRI->addOperand(ResumePhiRecipe);
8856+
break;
8857+
}
8858+
}
88568859

88578860
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
88588861
// Extract the penultimate value of the recurrence and use it as operand for

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
456456
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
457457
executeRecipes(State, getIRBasicBlock());
458458
if (getSingleSuccessor()) {
459-
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
460-
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
461-
Br->setOperand(0, nullptr);
462-
getIRBasicBlock()->getTerminator()->eraseFromParent();
459+
auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
460+
if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
461+
getIRBasicBlock()->getSingleSuccessor()) {
462+
cast<BranchInst>(getIRBasicBlock()->getTerminator())
463+
->setOperand(0, nullptr);
464+
} else {
465+
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
466+
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
467+
Br->setOperand(0, nullptr);
468+
getIRBasicBlock()->getTerminator()->eraseFromParent();
469+
}
463470
}
464471

465472
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -843,10 +850,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
843850
#endif
844851

845852
VPlan::~VPlan() {
846-
for (auto &KV : LiveOuts)
847-
delete KV.second;
848-
LiveOuts.clear();
849-
850853
if (Entry) {
851854
VPValue DummyValue;
852855
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -902,6 +905,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
902905
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
903906

904907
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
908+
VPBasicBlock *ScalarHeader =
909+
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
910+
VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
905911
if (!RequiresScalarEpilogueCheck) {
906912
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
907913
return Plan;
@@ -1051,6 +1057,8 @@ void VPlan::execute(VPTransformState *State) {
10511057
BrInst->insertBefore(MiddleBB->getTerminator());
10521058
MiddleBB->getTerminator()->eraseFromParent();
10531059
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
1060+
State->CFG.DTU.applyUpdates(
1061+
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
10541062

10551063
// Generate code in the loop pre-header and body.
10561064
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1169,12 +1177,6 @@ void VPlan::print(raw_ostream &O) const {
11691177
Block->print(O, "", SlotTracker);
11701178
}
11711179

1172-
if (!LiveOuts.empty())
1173-
O << "\n";
1174-
for (const auto &KV : LiveOuts) {
1175-
KV.second->print(O, SlotTracker);
1176-
}
1177-
11781180
O << "}\n";
11791181
}
11801182

@@ -1211,11 +1213,6 @@ LLVM_DUMP_METHOD
12111213
void VPlan::dump() const { print(dbgs()); }
12121214
#endif
12131215

1214-
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1215-
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1216-
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1217-
}
1218-
12191216
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12201217
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
12211218
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1283,10 +1280,6 @@ VPlan *VPlan::duplicate() {
12831280
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12841281
remapOperands(Entry, NewEntry, Old2NewVPValues);
12851282

1286-
// Clone live-outs.
1287-
for (const auto &[_, LO] : LiveOuts)
1288-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1289-
12901283
// Initialize remaining fields of cloned VPlan.
12911284
NewPlan->VFs = VFs;
12921285
NewPlan->UFs = UFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -655,48 +655,6 @@ class VPBlockBase {
655655
virtual VPBlockBase *clone() = 0;
656656
};
657657

658-
/// A value that is used outside the VPlan. The operand of the user needs to be
659-
/// added to the associated phi node. The incoming block from VPlan is
660-
/// determined by where the VPValue is defined: if it is defined by a recipe
661-
/// outside a region, its parent block is used, otherwise the middle block is
662-
/// used.
663-
class VPLiveOut : public VPUser {
664-
PHINode *Phi;
665-
666-
public:
667-
VPLiveOut(PHINode *Phi, VPValue *Op)
668-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
669-
670-
static inline bool classof(const VPUser *U) {
671-
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
672-
}
673-
674-
/// Fix the wrapped phi node. This means adding an incoming value to exit
675-
/// block phi's from the vector loop via middle block (values from scalar loop
676-
/// already reach these phi's), and updating the value to scalar header phi's
677-
/// from the scalar preheader.
678-
void fixPhi(VPlan &Plan, VPTransformState &State);
679-
680-
/// Returns true if the VPLiveOut uses scalars of operand \p Op.
681-
bool usesScalars(const VPValue *Op) const override {
682-
assert(is_contained(operands(), Op) &&
683-
"Op must be an operand of the recipe");
684-
return true;
685-
}
686-
687-
PHINode *getPhi() const { return Phi; }
688-
689-
/// Live-outs are marked as only using the first part during the transition
690-
/// to unrolling directly on VPlan.
691-
/// TODO: Remove after unroller transition.
692-
bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
693-
694-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
695-
/// Print the VPLiveOut to \p O.
696-
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
697-
#endif
698-
};
699-
700658
/// Struct to hold various analysis needed for cost computations.
701659
struct VPCostContext {
702660
const TargetTransformInfo &TTI;
@@ -3496,11 +3454,6 @@ class VPlan {
34963454
/// definitions are VPValues that hold a pointer to their underlying IR.
34973455
SmallVector<VPValue *, 16> VPLiveInsToFree;
34983456

3499-
/// Values used outside the plan. It contains live-outs that need fixing. Any
3500-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3501-
/// live-outs are fixed via VPLiveOut::fixPhi.
3502-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3503-
35043457
/// Mapping from SCEVs to the VPValues representing their expansions.
35053458
/// NOTE: This mapping is temporary and will be removed once all users have
35063459
/// been modeled in VPlan directly.
@@ -3680,12 +3633,6 @@ class VPlan {
36803633
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
36813634
}
36823635

3683-
void addLiveOut(PHINode *PN, VPValue *V);
3684-
3685-
const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
3686-
return LiveOuts;
3687-
}
3688-
36893636
VPValue *getSCEVExpansion(const SCEV *S) const {
36903637
return SCEVToExpansion.lookup(S);
36913638
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -202,35 +202,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
202202
}
203203
}
204204

205-
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
206-
VPValue *ExitValue = getOperand(0);
207-
VPBasicBlock *MiddleVPBB =
208-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
209-
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
210-
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
211-
// Values leaving the vector loop reach live out phi's in the exiting block
212-
// via middle block.
213-
auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
214-
? MiddleVPBB
215-
: ExitingVPBB;
216-
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
217-
Value *V = State.get(ExitValue, VPLane(0));
218-
if (Phi->getBasicBlockIndex(PredBB) != -1)
219-
Phi->setIncomingValueForBlock(PredBB, V);
220-
else
221-
Phi->addIncoming(V, PredBB);
222-
}
223-
224-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
225-
void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
226-
O << "Live-out ";
227-
getPhi()->printAsOperand(O);
228-
O << " = ";
229-
getOperand(0)->printAsOperand(O, SlotTracker);
230-
O << "\n";
231-
}
232-
#endif
233-
234205
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
235206
assert(!Parent && "Recipe already in some VPBasicBlock");
236207
assert(InsertPos->getParent() &&
@@ -855,7 +826,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
855826
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
856827
Value *V = State.get(ExitValue, VPLane(Lane));
857828
auto *Phi = cast<PHINode>(&I);
858-
Phi->addIncoming(V, PredBB);
829+
if (Phi->getBasicBlockIndex(PredBB) == -1)
830+
Phi->addIncoming(V, PredBB);
831+
else
832+
Phi->setIncomingValueForBlock(PredBB, V);
859833
}
860834

861835
// Advance the insert point after the wrapped IR instruction. This allows

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
378378
// Don't fold the exit block of the Plan into its single predecessor for
379379
// now.
380380
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
381-
if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
381+
if (!VPBB->getParent())
382382
continue;
383383
auto *PredVPBB =
384384
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
264264
return;
265265

266266
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
267+
VPValue *Op0, *Op1;
268+
if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
269+
m_VPValue(Op1)))) {
270+
addUniformForAllParts(VPI);
271+
return;
272+
}
273+
267274
if (vputils::onlyFirstPartUsed(VPI)) {
268275
addUniformForAllParts(VPI);
269276
return;
@@ -449,11 +456,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
449456
Part++;
450457
}
451458

452-
// Remap the operand of live-outs to the last part.
453-
for (const auto &[_, LO] : Plan.getLiveOuts()) {
454-
VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
455-
LO->setOperand(0, In);
456-
}
457-
458459
VPlanTransforms::removeDeadRecipes(Plan);
459460
}

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
244244
return false;
245245
}
246246

247-
VPBlockBase *MiddleBB =
248-
IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
249-
if (IRBB != IRBB->getPlan()->getPreheader() &&
250-
IRBB->getSinglePredecessor() != MiddleBB) {
251-
errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
252-
"middle-block at the moment!\n";
253-
return false;
254-
}
255247
return true;
256248
}
257249

@@ -416,12 +408,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
416408
return false;
417409
}
418410

419-
for (const auto &KV : Plan.getLiveOuts())
420-
if (KV.second->getNumOperands() != 1) {
421-
errs() << "live outs must have a single operand\n";
422-
return false;
423-
}
424-
425411
return true;
426412
}
427413

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
10771077
; CHECK-NEXT: No successors
10781078
; CHECK-EMPTY:
10791079
; CHECK-NEXT: scalar.ph
1080+
; CHECK-NEXT: Successor(s): ir-bb<loop>
1081+
; CHECK-EMPTY:
1082+
; CHECK-NEXT: ir-bb<loop>:
1083+
; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
1084+
; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1
1085+
; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
1086+
; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16
1087+
; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
1088+
; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
1089+
; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16
1090+
; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0
10801091
; CHECK-NEXT: No successors
10811092
; CHECK-NEXT: }
10821093
;
@@ -1156,6 +1167,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
11561167
; CHECK-NEXT: No successors
11571168
; CHECK-EMPTY:
11581169
; CHECK-NEXT: scalar.ph:
1170+
; CHECK-NEXT: Successor(s): ir-bb<loop.header>
1171+
; CHECK-EMPTY:
1172+
; CHECK-NEXT: ir-bb<loop.header>:
1173+
; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
1174+
; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
1175+
; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1
1176+
; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0
11591177
; CHECK-NEXT: No successors
11601178
; CHECK-NEXT: }
11611179
;

0 commit comments

Comments
 (0)