Skip to content

Commit 8f40a32

Browse files
committed
[VPlan] Use ResumePhi to create reduction resume phis.
Use VPInstruction::ResumePhi to create phi nodes for reduction resume values. This allows simplifying createAndCollectMergePhiForReduction to only collect reduction resume phis when vectorizing epilogue loops and adding extra incoming edges from the main vector loop.
1 parent 808c498 commit 8f40a32

File tree

4 files changed

+61
-41
lines changed

4 files changed

+61
-41
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7422,8 +7422,9 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
74227422
}
74237423

74247424
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7425-
// create a merge phi node for it and add it to \p ReductionResumeValues.
7426-
static void createAndCollectMergePhiForReduction(
7425+
// add it to \p ReductionResumeValues and add incoming values from the main
7426+
// vector loop.
7427+
static void updateAndCollectMergePhiForReductionForEpilogueVectorization(
74277428
VPInstruction *RedResult,
74287429
DenseMap<const RecurrenceDescriptor *, Value *> &ReductionResumeValues,
74297430
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
@@ -7432,15 +7433,24 @@ static void createAndCollectMergePhiForReduction(
74327433
RedResult->getOpcode() != VPInstruction::ComputeReductionResult)
74337434
return;
74347435

7436+
using namespace VPlanPatternMatch;
7437+
VPValue *ResumePhiVPV =
7438+
cast<VPInstruction>(*find_if(RedResult->users(), [](VPUser *U) {
7439+
return match(U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(),
7440+
m_VPValue()));
7441+
}));
7442+
auto *BCBlockPhi = cast<PHINode>(State.get(ResumePhiVPV, true));
74357443
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
74367444
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
7445+
if (!VectorizingEpilogue) {
7446+
ReductionResumeValues[&RdxDesc] = BCBlockPhi;
7447+
return;
7448+
}
74377449

7438-
Value *FinalValue =
7439-
State.get(RedResult, VPIteration(0, VPLane::getFirstLane()));
74407450
auto *ResumePhi =
74417451
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
7442-
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
7443-
RdxDesc.getRecurrenceKind())) {
7452+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
7453+
RdxDesc.getRecurrenceKind())) {
74447454
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue());
74457455
assert(Cmp->getPredicate() == CmpInst::ICMP_NE);
74467456
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue());
@@ -7450,42 +7460,15 @@ static void createAndCollectMergePhiForReduction(
74507460
"when vectorizing the epilogue loop, we need a resume phi from main "
74517461
"vector loop");
74527462

7453-
// TODO: bc.merge.rdx should not be created here, instead it should be
7454-
// modeled in VPlan.
74557463
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader();
7456-
// Create a phi node that merges control-flow from the backedge-taken check
7457-
// block and the middle block.
7458-
auto *BCBlockPhi =
7459-
PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx",
7460-
LoopScalarPreHeader->getTerminator()->getIterator());
7461-
74627464
// If we are fixing reductions in the epilogue loop then we should already
74637465
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
74647466
// we carry over the incoming values correctly.
74657467
for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
7466-
if (Incoming == LoopMiddleBlock)
7467-
BCBlockPhi->addIncoming(FinalValue, Incoming);
7468-
else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
7469-
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
7470-
Incoming);
7471-
else
7472-
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
7468+
if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
7469+
BCBlockPhi->setIncomingValueForBlock(
7470+
Incoming, ResumePhi->getIncomingValueForBlock(Incoming));
74737471
}
7474-
7475-
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
7476-
// TODO: This fixup should instead be modeled in VPlan.
7477-
// Fix the scalar loop reduction variable with the incoming reduction sum
7478-
// from the vector body and from the backedge value.
7479-
int IncomingEdgeBlockIdx =
7480-
OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch());
7481-
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
7482-
// Pick the other block.
7483-
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
7484-
OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
7485-
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
7486-
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
7487-
7488-
ReductionResumeValues[&RdxDesc] = BCBlockPhi;
74897472
}
74907473

74917474
std::pair<DenseMap<const SCEV *, Value *>,
@@ -7579,11 +7562,12 @@ LoopVectorizationPlanner::executePlan(
75797562
DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
75807563
auto *ExitVPBB =
75817564
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7582-
for (VPRecipeBase &R : *ExitVPBB) {
7583-
createAndCollectMergePhiForReduction(
7584-
dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7585-
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7586-
}
7565+
if (IsEpilogueVectorization)
7566+
for (VPRecipeBase &R : *ExitVPBB) {
7567+
updateAndCollectMergePhiForReductionForEpilogueVectorization(
7568+
dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7569+
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7570+
}
75877571

75887572
// 2.6. Maintain Loop Hints
75897573
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9369,6 +9353,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93699353
m_VPValue()));
93709354
});
93719355

9356+
VPBasicBlock *ScalarPHVPBB = nullptr;
9357+
if (MiddleVPBB->getNumSuccessors() == 2) {
9358+
// Order is strict: first is the exit block, second is the scalar
9359+
// preheader.
9360+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
9361+
} else {
9362+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
9363+
}
9364+
9365+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
9366+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
9367+
VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
9368+
{}, "bc.merge.rdx");
9369+
auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
9370+
Plan->addLiveOut(RedPhi, ResumePhiRecipe);
9371+
93729372
// Adjust AnyOf reductions; replace the reduction phi for the selected value
93739373
// with a boolean reduction phi node to check if the condition is true in
93749374
// any iteration. The final value is selected by the final

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
6565
; IF-EVL-INLOOP-NEXT: No successors
6666
; IF-EVL-INLOOP-EMPTY:
6767
; IF-EVL-INLOOP-NEXT: scalar.ph:
68+
; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
6869
; IF-EVL-INLOOP-NEXT: No successors
70+
; IF-EVL-INLOOP-EMPTY:
71+
; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
6972
; IF-EVL-INLOOP-NEXT: }
7073
;
7174

@@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
104107
; NO-VP-OUTLOOP-NEXT: No successors
105108
; NO-VP-OUTLOOP-EMPTY:
106109
; NO-VP-OUTLOOP-NEXT: scalar.ph:
110+
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
107111
; NO-VP-OUTLOOP-NEXT: No successors
112+
; NO-VP-OUTLOOP-EMPTY:
113+
; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
108114
; NO-VP-OUTLOOP-NEXT: }
109115
;
110116

@@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
143149
; NO-VP-INLOOP-NEXT: No successors
144150
; NO-VP-INLOOP-EMPTY:
145151
; NO-VP-INLOOP-NEXT: scalar.ph:
152+
; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
146153
; NO-VP-INLOOP-NEXT: No successors
154+
; NO-VP-INLOOP-EMPTY:
155+
; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
147156
; NO-VP-INLOOP-NEXT: }
148157
;
149158
entry:

llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
232232
; CHECK-EMPTY:
233233
; CHECK-NEXT: scalar.ph
234234
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
235+
; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234>
235236
; CHECK-NEXT: No successors
236237
; CHECK-EMPTY:
237238
; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
239+
; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]>
238240
; CHECK-NEXT: }
239241
;
240242
entry:

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
165165
; CHECK-NEXT: No successors
166166
; CHECK-EMPTY:
167167
; CHECK-NEXT: scalar.ph
168+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
168169
; CHECK-NEXT: No successors
170+
; CHECK-EMPTY:
171+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
169172
; CHECK-NEXT: }
170173
;
171174
entry:
@@ -220,7 +223,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
220223
; CHECK-NEXT: No successors
221224
; CHECK-EMPTY:
222225
; CHECK-NEXT: scalar.ph
226+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
223227
; CHECK-NEXT: No successors
228+
; CHECK-EMPTY:
229+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
224230
; CHECK-NEXT: }
225231
;
226232
entry:
@@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
447453
; CHECK-NEXT: No successors
448454
; CHECK-EMPTY:
449455
; CHECK-NEXT: scalar.ph
456+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
450457
; CHECK-NEXT: No successors
458+
; CHECK-EMPTY:
459+
; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]>
451460
; CHECK-NEXT:}
452461

453462
entry:

0 commit comments

Comments
 (0)