Skip to content

Commit aefe89b

Browse files
committed
[VPlan] Use ResumePhi to create reduction resume phis.
Use VPInstruction::ResumePhi to create phi nodes for reduction resume values. This allows simplifying createAndCollectMergePhiForReduction to only collect reduction resume phis when vectorizing epilogue loops and adding extra incoming edges from the main vector loop.
1 parent 2b125e8 commit aefe89b

File tree

4 files changed

+61
-40
lines changed

4 files changed

+61
-40
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 41 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7423,8 +7423,9 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
74237423
}
74247424

74257425
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7426-
// create a merge phi node for it and add it to \p ReductionResumeValues.
7427-
static void createAndCollectMergePhiForReduction(
7426+
// add it to \p ReductionResumeValues and add incoming values from the main
7427+
// vector loop.
7428+
static void updateAndCollectMergePhiForReductionForEpilogueVectorization(
74287429
VPInstruction *RedResult,
74297430
DenseMap<const RecurrenceDescriptor *, Value *> &ReductionResumeValues,
74307431
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
@@ -7433,14 +7434,24 @@ static void createAndCollectMergePhiForReduction(
74337434
RedResult->getOpcode() != VPInstruction::ComputeReductionResult)
74347435
return;
74357436

7437+
using namespace VPlanPatternMatch;
7438+
VPValue *ResumePhiVPV =
7439+
cast<VPInstruction>(*find_if(RedResult->users(), [](VPUser *U) {
7440+
return match(U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(),
7441+
m_VPValue()));
7442+
}));
7443+
auto *BCBlockPhi = cast<PHINode>(State.get(ResumePhiVPV, true));
74367444
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
74377445
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
7446+
if (!VectorizingEpilogue) {
7447+
ReductionResumeValues[&RdxDesc] = BCBlockPhi;
7448+
return;
7449+
}
74387450

7439-
Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane()));
74407451
auto *ResumePhi =
74417452
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
7442-
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
7443-
RdxDesc.getRecurrenceKind())) {
7453+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
7454+
RdxDesc.getRecurrenceKind())) {
74447455
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue());
74457456
assert(Cmp->getPredicate() == CmpInst::ICMP_NE);
74467457
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue());
@@ -7450,42 +7461,15 @@ static void createAndCollectMergePhiForReduction(
74507461
"when vectorizing the epilogue loop, we need a resume phi from main "
74517462
"vector loop");
74527463

7453-
// TODO: bc.merge.rdx should not be created here, instead it should be
7454-
// modeled in VPlan.
74557464
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader();
7456-
// Create a phi node that merges control-flow from the backedge-taken check
7457-
// block and the middle block.
7458-
auto *BCBlockPhi =
7459-
PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx",
7460-
LoopScalarPreHeader->getTerminator()->getIterator());
7461-
74627465
// If we are fixing reductions in the epilogue loop then we should already
74637466
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
74647467
// we carry over the incoming values correctly.
74657468
for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
7466-
if (Incoming == LoopMiddleBlock)
7467-
BCBlockPhi->addIncoming(FinalValue, Incoming);
7468-
else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
7469-
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
7470-
Incoming);
7471-
else
7472-
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
7469+
if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
7470+
BCBlockPhi->setIncomingValueForBlock(
7471+
Incoming, ResumePhi->getIncomingValueForBlock(Incoming));
74737472
}
7474-
7475-
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
7476-
// TODO: This fixup should instead be modeled in VPlan.
7477-
// Fix the scalar loop reduction variable with the incoming reduction sum
7478-
// from the vector body and from the backedge value.
7479-
int IncomingEdgeBlockIdx =
7480-
OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch());
7481-
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
7482-
// Pick the other block.
7483-
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
7484-
OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
7485-
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
7486-
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
7487-
7488-
ReductionResumeValues[&RdxDesc] = BCBlockPhi;
74897473
}
74907474

74917475
std::pair<DenseMap<const SCEV *, Value *>,
@@ -7579,11 +7563,12 @@ LoopVectorizationPlanner::executePlan(
75797563
DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
75807564
auto *ExitVPBB =
75817565
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7582-
for (VPRecipeBase &R : *ExitVPBB) {
7583-
createAndCollectMergePhiForReduction(
7584-
dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7585-
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7586-
}
7566+
if (IsEpilogueVectorization)
7567+
for (VPRecipeBase &R : *ExitVPBB) {
7568+
updateAndCollectMergePhiForReductionForEpilogueVectorization(
7569+
dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7570+
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7571+
}
75877572

75887573
// 2.6. Maintain Loop Hints
75897574
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9369,6 +9354,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93699354
m_VPValue()));
93709355
});
93719356

9357+
VPBasicBlock *ScalarPHVPBB = nullptr;
9358+
if (MiddleVPBB->getNumSuccessors() == 2) {
9359+
// Order is strict: first is the exit block, second is the scalar
9360+
// preheader.
9361+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
9362+
} else {
9363+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
9364+
}
9365+
9366+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
9367+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
9368+
VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
9369+
{}, "bc.merge.rdx");
9370+
auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
9371+
Plan->addLiveOut(RedPhi, ResumePhiRecipe);
9372+
93729373
// Adjust AnyOf reductions; replace the reduction phi for the selected value
93739374
// with a boolean reduction phi node to check if the condition is true in
93749375
// any iteration. The final value is selected by the final

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
6565
; IF-EVL-INLOOP-NEXT: No successors
6666
; IF-EVL-INLOOP-EMPTY:
6767
; IF-EVL-INLOOP-NEXT: scalar.ph:
68+
; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
6869
; IF-EVL-INLOOP-NEXT: No successors
70+
; IF-EVL-INLOOP-EMPTY:
71+
; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
6972
; IF-EVL-INLOOP-NEXT: }
7073
;
7174

@@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
104107
; NO-VP-OUTLOOP-NEXT: No successors
105108
; NO-VP-OUTLOOP-EMPTY:
106109
; NO-VP-OUTLOOP-NEXT: scalar.ph:
110+
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
107111
; NO-VP-OUTLOOP-NEXT: No successors
112+
; NO-VP-OUTLOOP-EMPTY:
113+
; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
108114
; NO-VP-OUTLOOP-NEXT: }
109115
;
110116

@@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
143149
; NO-VP-INLOOP-NEXT: No successors
144150
; NO-VP-INLOOP-EMPTY:
145151
; NO-VP-INLOOP-NEXT: scalar.ph:
152+
; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
146153
; NO-VP-INLOOP-NEXT: No successors
154+
; NO-VP-INLOOP-EMPTY:
155+
; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
147156
; NO-VP-INLOOP-NEXT: }
148157
;
149158
entry:

llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
232232
; CHECK-EMPTY:
233233
; CHECK-NEXT: scalar.ph
234234
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
235+
; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234>
235236
; CHECK-NEXT: No successors
236237
; CHECK-EMPTY:
237238
; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
239+
; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]>
238240
; CHECK-NEXT: }
239241
;
240242
entry:

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
165165
; CHECK-NEXT: No successors
166166
; CHECK-EMPTY:
167167
; CHECK-NEXT: scalar.ph
168+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
168169
; CHECK-NEXT: No successors
170+
; CHECK-EMPTY:
171+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
169172
; CHECK-NEXT: }
170173
;
171174
entry:
@@ -220,7 +223,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
220223
; CHECK-NEXT: No successors
221224
; CHECK-EMPTY:
222225
; CHECK-NEXT: scalar.ph
226+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
223227
; CHECK-NEXT: No successors
228+
; CHECK-EMPTY:
229+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
224230
; CHECK-NEXT: }
225231
;
226232
entry:
@@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
447453
; CHECK-NEXT: No successors
448454
; CHECK-EMPTY:
449455
; CHECK-NEXT: scalar.ph
456+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
450457
; CHECK-NEXT: No successors
458+
; CHECK-EMPTY:
459+
; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]>
451460
; CHECK-NEXT:}
452461

453462
entry:

0 commit comments

Comments
 (0)