Skip to content

Commit f0c5caa

Browse files
authored
[VPlan] Add VPIRInstruction, use for exit block live-outs. (#100735)
Add a new VPIRInstruction recipe to wrap existing IR instructions not to be modified during execution, execept for PHIs. For PHIs, a single VPValue operand is allowed, and it is used to add a new incoming value for the single predecessor VPBB. Expect PHIs, VPIRInstructions cannot have any operands. Depends on #100658. PR: #100735
1 parent a578558 commit f0c5caa

13 files changed

+166
-67
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 46 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8630,11 +8630,12 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
86308630
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
86318631
}
86328632

8633-
// Collect (ExitPhi, ExitingValue) pairs phis in the original exit block that
8634-
// are modeled in VPlan. Some exiting values are not modeled explicitly yet and
8635-
// won't be included. Those are un-truncated VPWidenIntOrFpInductionRecipe,
8636-
// VPWidenPointerInductionRecipe and induction increments.
8637-
static MapVector<PHINode *, VPValue *> collectUsersInExitBlock(
8633+
// Collect VPIRInstructions for phis in the original exit block that are modeled
8634+
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
8635+
// modeled explicitly yet and won't be included. Those are un-truncated
8636+
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
8637+
// increments.
8638+
static SetVector<VPIRInstruction *> collectUsersInExitBlock(
86388639
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
86398640
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
86408641
auto *MiddleVPBB =
@@ -8644,13 +8645,17 @@ static MapVector<PHINode *, VPValue *> collectUsersInExitBlock(
86448645
// from scalar loop only.
86458646
if (MiddleVPBB->getNumSuccessors() != 2)
86468647
return {};
8647-
MapVector<PHINode *, VPValue *> ExitingValuesToFix;
8648-
BasicBlock *ExitBB =
8649-
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
8648+
SetVector<VPIRInstruction *> ExitUsersToFix;
8649+
VPBasicBlock *ExitVPBB = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0]);
86508650
BasicBlock *ExitingBB = OrigLoop->getExitingBlock();
8651-
for (PHINode &ExitPhi : ExitBB->phis()) {
8652-
Value *IncomingValue =
8653-
ExitPhi.getIncomingValueForBlock(ExitingBB);
8651+
for (VPRecipeBase &R : *ExitVPBB) {
8652+
auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8653+
if (!ExitIRI)
8654+
continue;
8655+
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
8656+
if (!ExitPhi)
8657+
break;
8658+
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
86548659
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
86558660
// Exit values for inductions are computed and updated outside of VPlan and
86568661
// independent of induction recipes.
@@ -8666,17 +8671,18 @@ static MapVector<PHINode *, VPValue *> collectUsersInExitBlock(
86668671
return P && Inductions.contains(P);
86678672
})))
86688673
continue;
8669-
ExitingValuesToFix.insert({&ExitPhi, V});
8674+
ExitUsersToFix.insert(ExitIRI);
8675+
ExitIRI->addOperand(V);
86708676
}
8671-
return ExitingValuesToFix;
8677+
return ExitUsersToFix;
86728678
}
86738679

8674-
// Add exit values to \p Plan. Extracts and VPLiveOuts are added for each entry
8675-
// in \p ExitingValuesToFix.
8680+
// Add exit values to \p Plan. Extracts are added for each entry in \p
8681+
// ExitUsersToFix if needed and their operands are updated.
86768682
static void
86778683
addUsersInExitBlock(VPlan &Plan,
8678-
MapVector<PHINode *, VPValue *> &ExitingValuesToFix) {
8679-
if (ExitingValuesToFix.empty())
8684+
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8685+
if (ExitUsersToFix.empty())
86808686
return;
86818687

86828688
auto *MiddleVPBB =
@@ -8685,18 +8691,19 @@ addUsersInExitBlock(VPlan &Plan,
86858691
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
86868692
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
86878693

8688-
// Introduce VPUsers modeling the exit values.
8689-
for (const auto &[ExitPhi, V] : ExitingValuesToFix) {
8694+
// Introduce extract for exiting values and update the VPIRInstructions
8695+
// modeling the corresponding LCSSA phis.
8696+
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
8697+
VPValue *V = ExitIRI->getOperand(0);
86908698
// Pass live-in values used by exit phis directly through to the live-out.
8691-
if (V->isLiveIn()) {
8692-
Plan.addLiveOut(ExitPhi, V);
8699+
if (V->isLiveIn())
86938700
continue;
8694-
}
8701+
86958702
VPValue *Ext = B.createNaryOp(
86968703
VPInstruction::ExtractFromEnd,
86978704
{V, Plan.getOrAddLiveIn(ConstantInt::get(
86988705
IntegerType::get(ExitBB->getContext(), 32), 1))});
8699-
Plan.addLiveOut(ExitPhi, Ext);
8706+
ExitIRI->setOperand(0, Ext);
87008707
}
87018708
}
87028709

@@ -8709,7 +8716,7 @@ addUsersInExitBlock(VPlan &Plan,
87098716
/// 2. Feed the penultimate value of recurrences to their LCSSA phi users in
87108717
/// the original exit block using a VPLiveOut.
87118718
static void addLiveOutsForFirstOrderRecurrences(
8712-
VPlan &Plan, MapVector<PHINode *, VPValue *> &ExitingValuesToFix) {
8719+
VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix) {
87138720
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
87148721

87158722
// Start by finding out if middle block branches to scalar preheader, which is
@@ -8726,14 +8733,14 @@ static void addLiveOutsForFirstOrderRecurrences(
87268733
ExitBB =
87278734
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock();
87288735
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8729-
} else if (ExitingValuesToFix.empty()) {
8736+
} else if (ExitUsersToFix.empty()) {
87308737
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
87318738
} else {
87328739
ExitBB = cast<VPIRBasicBlock>(MiddleVPBB->getSingleSuccessor())
87338740
->getIRBasicBlock();
87348741
}
87358742
if (!ScalarPHVPBB) {
8736-
assert(ExitingValuesToFix.empty() &&
8743+
assert(ExitUsersToFix.empty() &&
87378744
"missed inserting extracts for exiting values");
87388745
return;
87398746
}
@@ -8827,24 +8834,17 @@ static void addLiveOutsForFirstOrderRecurrences(
88278834
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
88288835
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
88298836

8830-
// Now create VPLiveOuts for users in the exit block.
8831-
// Extract the penultimate value of the recurrence and add VPLiveOut
8832-
// users of the recurrence splice.
8833-
8834-
// No edge from the middle block to the unique exit block has been inserted
8835-
// and there is nothing to fix from vector loop; phis should have incoming
8836-
// from scalar loop only.
8837-
if (ExitingValuesToFix.empty())
8838-
continue;
8839-
for (User *U : FORPhi->users()) {
8840-
auto *UI = cast<Instruction>(U);
8841-
if (UI->getParent() != ExitBB)
8837+
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
8838+
// Extract the penultimate value of the recurrence and use it as operand for
8839+
// the VPIRInstruction modeling the phi.
8840+
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
8841+
if (ExitIRI->getOperand(0) != FOR)
88428842
continue;
88438843
VPValue *Ext = MiddleBuilder.createNaryOp(
88448844
VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {},
88458845
"vector.recur.extract.for.phi");
8846-
Plan.addLiveOut(cast<PHINode>(UI), Ext);
8847-
ExitingValuesToFix.erase(cast<PHINode>(UI));
8846+
ExitIRI->setOperand(0, Ext);
8847+
ExitUsersToFix.remove(ExitIRI);
88488848
}
88498849
}
88508850
}
@@ -9006,11 +9006,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
90069006
"VPBasicBlock");
90079007
RecipeBuilder.fixHeaderPhis();
90089008

9009-
MapVector<PHINode *, VPValue *> ExitingValuesToFix = collectUsersInExitBlock(
9009+
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
90109010
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9011-
9012-
addLiveOutsForFirstOrderRecurrences(*Plan, ExitingValuesToFix);
9013-
addUsersInExitBlock(*Plan, ExitingValuesToFix);
9011+
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9012+
addUsersInExitBlock(*Plan, ExitUsersToFix);
90149013

90159014
// ---------------------------------------------------------------------------
90169015
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -10128,7 +10127,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1012810127
// directly in VPlan.
1012910128
EpilogILV.setTripCount(MainILV.getTripCount());
1013010129
for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
10131-
auto *ExpandR = cast<VPExpandSCEVRecipe>(&R);
10130+
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
10131+
if (!ExpandR)
10132+
continue;
1013210133
auto *ExpandedVal = BestEpiPlan.getOrAddLiveIn(
1013310134
ExpandedSCEVs.find(ExpandR->getSCEV())->second);
1013410135
ExpandR->replaceAllUsesWith(ExpandedVal);

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -860,10 +860,18 @@ VPlan::~VPlan() {
860860
delete BackedgeTakenCount;
861861
}
862862

863+
static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) {
864+
auto *VPIRBB = new VPIRBasicBlock(BB);
865+
for (Instruction &I :
866+
make_range(BB->begin(), BB->getTerminator()->getIterator()))
867+
VPIRBB->appendRecipe(new VPIRInstruction(I));
868+
return VPIRBB;
869+
}
870+
863871
VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE,
864872
bool RequiresScalarEpilogueCheck,
865873
bool TailFolded, Loop *TheLoop) {
866-
VPIRBasicBlock *Entry = new VPIRBasicBlock(TheLoop->getLoopPreheader());
874+
VPIRBasicBlock *Entry = createVPIRBasicBlockFor(TheLoop->getLoopPreheader());
867875
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
868876
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
869877
Plan->TripCount =
@@ -895,7 +903,7 @@ VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE,
895903
// we unconditionally branch to the scalar preheader. Do nothing.
896904
// 3) Otherwise, construct a runtime check.
897905
BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock();
898-
auto *VPExitBlock = new VPIRBasicBlock(IRExitBlock);
906+
auto *VPExitBlock = createVPIRBasicBlockFor(IRExitBlock);
899907
// The connection order corresponds to the operands of the conditional branch.
900908
VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
901909
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
@@ -972,7 +980,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
972980
/// predecessor, which is rewired to the new VPIRBasicBlock. All successors of
973981
/// VPBB, if any, are rewired to the new VPIRBasicBlock.
974982
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
975-
VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB);
983+
VPIRBasicBlock *IRMiddleVPBB = createVPIRBasicBlockFor(IRBB);
976984
for (auto &R : make_early_inc_range(*VPBB))
977985
R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end());
978986
VPBlockBase *PredVPBB = VPBB->getSinglePredecessor();

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -936,8 +936,9 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
936936
case VPRecipeBase::VPReductionPHISC:
937937
case VPRecipeBase::VPScalarCastSC:
938938
return true;
939-
case VPRecipeBase::VPInterleaveSC:
940939
case VPRecipeBase::VPBranchOnMaskSC:
940+
case VPRecipeBase::VPInterleaveSC:
941+
case VPRecipeBase::VPIRInstructionSC:
941942
case VPRecipeBase::VPWidenLoadEVLSC:
942943
case VPRecipeBase::VPWidenLoadSC:
943944
case VPRecipeBase::VPWidenStoreEVLSC:
@@ -1405,6 +1406,45 @@ class VPInstruction : public VPRecipeWithIRFlags {
14051406
bool isSingleScalar() const;
14061407
};
14071408

1409+
/// A recipe to wrap on original IR instruction not to be modified during
1410+
/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
1411+
/// and it is used to add a new incoming value for the single predecessor VPBB.
1412+
/// Expect PHIs, VPIRInstructions cannot have any operands.
1413+
class VPIRInstruction : public VPRecipeBase {
1414+
Instruction &I;
1415+
1416+
public:
1417+
VPIRInstruction(Instruction &I)
1418+
: VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1419+
1420+
~VPIRInstruction() override = default;
1421+
1422+
VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1423+
1424+
VPIRInstruction *clone() override {
1425+
auto *R = new VPIRInstruction(I);
1426+
for (auto *Op : operands())
1427+
R->addOperand(Op);
1428+
return R;
1429+
}
1430+
1431+
void execute(VPTransformState &State) override;
1432+
1433+
Instruction &getInstruction() { return I; }
1434+
1435+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1436+
/// Print the recipe.
1437+
void print(raw_ostream &O, const Twine &Indent,
1438+
VPSlotTracker &SlotTracker) const override;
1439+
#endif
1440+
1441+
bool usesScalars(const VPValue *Op) const override {
1442+
assert(is_contained(operands(), Op) &&
1443+
"Op must be an operand of the recipe");
1444+
return true;
1445+
}
1446+
};
1447+
14081448
/// VPWidenRecipe is a recipe for producing a widened instruction using the
14091449
/// opcode and operands of the recipe. This recipe covers most of the
14101450
/// traditional vectorization cases where each recipe transforms into a

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,43 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
867867
}
868868
#endif
869869

870+
void VPIRInstruction::execute(VPTransformState &State) {
871+
assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
872+
"Only PHINodes can have extra operands");
873+
if (getNumOperands() == 1) {
874+
VPValue *ExitValue = getOperand(0);
875+
auto Lane = vputils::isUniformAfterVectorization(ExitValue)
876+
? VPLane::getFirstLane()
877+
: VPLane::getLastLaneForVF(State.VF);
878+
auto *PredVPBB = cast<VPBasicBlock>(getParent()->getSinglePredecessor());
879+
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
880+
// Set insertion point in PredBB in case an extract needs to be generated.
881+
// TODO: Model extracts explicitly.
882+
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
883+
Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
884+
auto *Phi = cast<PHINode>(&I);
885+
Phi->addIncoming(V, PredBB);
886+
}
887+
888+
// Advance the insert point after the wrapped IR instruction. This allows
889+
// interleaving VPIRInstructions and other recipes.
890+
State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
891+
}
892+
893+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
894+
void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
895+
VPSlotTracker &SlotTracker) const {
896+
O << Indent << "IR " << I;
897+
898+
if (getNumOperands() != 0) {
899+
assert(getNumOperands() == 1 && "can have at most 1 operand");
900+
O << " (extra operand: ";
901+
printOperands(O, SlotTracker);
902+
O << ")";
903+
}
904+
}
905+
#endif
906+
870907
void VPWidenCallRecipe::execute(VPTransformState &State) {
871908
assert(State.VF.isVector() && "not widening");
872909
Function *CalledScalarFn = getCalledScalarFunction();

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ class VPDef {
339339
VPBranchOnMaskSC,
340340
VPDerivedIVSC,
341341
VPExpandSCEVSC,
342+
VPIRInstructionSC,
342343
VPInstructionSC,
343344
VPInterleaveSC,
344345
VPReductionEVLSC,

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,15 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
126126
RecipeNumbering[&R] = Cnt++;
127127

128128
for (const VPRecipeBase &R : *VPBB) {
129+
if (isa<VPIRInstruction>(&R) ^ isa<VPIRBasicBlock>(VPBB)) {
130+
errs() << "VPIRInstructions ";
131+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
132+
R.dump();
133+
errs() << " ";
134+
#endif
135+
errs() << "not in a VPIRBasicBlock!\n";
136+
return false;
137+
}
129138
for (const VPValue *V : R.definedValues()) {
130139
for (const VPUser *U : V->users()) {
131140
auto *UI = dyn_cast<VPRecipeBase>(U);

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
5858
; CHECK-NEXT: vp<%2> = original trip-count
5959
; CHECK-EMPTY:
6060
; CHECK-NEXT: ir-bb<for.body.preheader>:
61+
; CHECK-NEXT: IR %0 = zext i32 %n to i64
6162
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
6263
; CHECK-NEXT: No successors
6364
; CHECK-EMPTY:
@@ -141,6 +142,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
141142
; CHECK-NEXT: vp<%2> = original trip-count
142143
; CHECK-EMPTY:
143144
; CHECK-NEXT: ir-bb<for.body.preheader>:
145+
; CHECK-NEXT: IR %0 = zext i32 %n to i64
144146
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
145147
; CHECK-NEXT: No successors
146148
; CHECK-EMPTY:
@@ -260,6 +262,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
260262
; CHECK-NEXT: vp<%2> = original trip-count
261263
; CHECK-EMPTY:
262264
; CHECK-NEXT: ir-bb<for.body.preheader>:
265+
; CHECK-NEXT: IR %0 = zext i32 %n to i64
263266
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
264267
; CHECK-NEXT: No successors
265268
; CHECK-EMPTY:
@@ -343,6 +346,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
343346
; CHECK-NEXT: vp<%2> = original trip-count
344347
; CHECK-EMPTY:
345348
; CHECK-NEXT: ir-bb<for.body.preheader>:
349+
; CHECK-NEXT: IR %0 = zext i32 %n to i64
346350
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
347351
; CHECK-NEXT: No successors
348352
; CHECK-EMPTY:

0 commit comments

Comments
 (0)