Skip to content

Commit 10cc688

Browse files
committed
[VPlan] Use ResumePhi to create reduction resume phis.
Use VPInstruction::ResumePhi to create phi nodes for reduction resume values. This allows simplifying createAndCollectMergePhiForReduction to only collect reduction resume phis when vectorizing epilogue loops and adding extra incoming edges from the main vector loop.
1 parent ee57a68 commit 10cc688

File tree

4 files changed

+61
-41
lines changed

4 files changed

+61
-41
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7435,23 +7435,31 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
74357435
}
74367436

74377437
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7438-
// create a merge phi node for it.
7439-
static void createAndCollectMergePhiForReduction(
7440-
VPInstruction *RedResult,
7441-
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
7442-
bool VectorizingEpilogue) {
7438+
// create a merge phi node for it and add incoming values from the main vector
7439+
// loop.
7440+
static void updateAndCollectMergePhiForReductionForEpilogueVectorization(
7441+
VPInstruction *RedResult, VPTransformState &State, Loop *OrigLoop,
7442+
BasicBlock *LoopMiddleBlock, bool VectorizingEpilogue) {
74437443
if (!RedResult ||
74447444
RedResult->getOpcode() != VPInstruction::ComputeReductionResult)
74457445
return;
74467446

7447+
using namespace VPlanPatternMatch;
7448+
VPValue *ResumePhiVPV =
7449+
cast<VPInstruction>(*find_if(RedResult->users(), [](VPUser *U) {
7450+
return match(U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(),
7451+
m_VPValue()));
7452+
}));
7453+
auto *BCBlockPhi = cast<PHINode>(State.get(ResumePhiVPV, true));
74477454
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
74487455
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
7456+
if (!VectorizingEpilogue)
7457+
return;
74497458

7450-
Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane()));
74517459
auto *ResumePhi =
74527460
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
7453-
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
7454-
RdxDesc.getRecurrenceKind())) {
7461+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
7462+
RdxDesc.getRecurrenceKind())) {
74557463
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue());
74567464
assert(Cmp->getPredicate() == CmpInst::ICMP_NE);
74577465
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue());
@@ -7461,40 +7469,15 @@ static void createAndCollectMergePhiForReduction(
74617469
"when vectorizing the epilogue loop, we need a resume phi from main "
74627470
"vector loop");
74637471

7464-
// TODO: bc.merge.rdx should not be created here, instead it should be
7465-
// modeled in VPlan.
74667472
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader();
7467-
// Create a phi node that merges control-flow from the backedge-taken check
7468-
// block and the middle block.
7469-
auto *BCBlockPhi =
7470-
PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx",
7471-
LoopScalarPreHeader->getTerminator()->getIterator());
7472-
74737473
// If we are fixing reductions in the epilogue loop then we should already
74747474
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
74757475
// we carry over the incoming values correctly.
74767476
for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
7477-
if (Incoming == LoopMiddleBlock)
7478-
BCBlockPhi->addIncoming(FinalValue, Incoming);
7479-
else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
7480-
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
7481-
Incoming);
7482-
else
7483-
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
7477+
if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
7478+
BCBlockPhi->setIncomingValueForBlock(
7479+
Incoming, ResumePhi->getIncomingValueForBlock(Incoming));
74847480
}
7485-
7486-
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
7487-
// TODO: This fixup should instead be modeled in VPlan.
7488-
// Fix the scalar loop reduction variable with the incoming reduction sum
7489-
// from the vector body and from the backedge value.
7490-
int IncomingEdgeBlockIdx =
7491-
OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch());
7492-
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
7493-
// Pick the other block.
7494-
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
7495-
OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
7496-
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
7497-
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
74987481
}
74997482

75007483
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
@@ -7585,11 +7568,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
75857568
// 2.5 Collect reduction resume values.
75867569
auto *ExitVPBB =
75877570
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7588-
for (VPRecipeBase &R : *ExitVPBB) {
7589-
createAndCollectMergePhiForReduction(
7590-
dyn_cast<VPInstruction>(&R), State, OrigLoop,
7591-
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7592-
}
7571+
if (IsEpilogueVectorization)
7572+
for (VPRecipeBase &R : *ExitVPBB) {
7573+
updateAndCollectMergePhiForReductionForEpilogueVectorization(
7574+
dyn_cast<VPInstruction>(&R), State, OrigLoop,
7575+
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7576+
}
75937577

75947578
// 2.6. Maintain Loop Hints
75957579
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9383,6 +9367,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93839367
});
93849368
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
93859369

9370+
VPBasicBlock *ScalarPHVPBB = nullptr;
9371+
if (MiddleVPBB->getNumSuccessors() == 2) {
9372+
// Order is strict: first is the exit block, second is the scalar
9373+
// preheader.
9374+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
9375+
} else {
9376+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
9377+
}
9378+
9379+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
9380+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
9381+
VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
9382+
{}, "bc.merge.rdx");
9383+
auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
9384+
Plan->addLiveOut(RedPhi, ResumePhiRecipe);
9385+
93869386
// Adjust AnyOf reductions; replace the reduction phi for the selected value
93879387
// with a boolean reduction phi node to check if the condition is true in
93889388
// any iteration. The final value is selected by the final

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
6565
; IF-EVL-INLOOP-NEXT: No successors
6666
; IF-EVL-INLOOP-EMPTY:
6767
; IF-EVL-INLOOP-NEXT: scalar.ph:
68+
; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
6869
; IF-EVL-INLOOP-NEXT: No successors
70+
; IF-EVL-INLOOP-EMPTY:
71+
; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
6972
; IF-EVL-INLOOP-NEXT: }
7073
;
7174

@@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
104107
; NO-VP-OUTLOOP-NEXT: No successors
105108
; NO-VP-OUTLOOP-EMPTY:
106109
; NO-VP-OUTLOOP-NEXT: scalar.ph:
110+
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
107111
; NO-VP-OUTLOOP-NEXT: No successors
112+
; NO-VP-OUTLOOP-EMPTY:
113+
; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
108114
; NO-VP-OUTLOOP-NEXT: }
109115
;
110116

@@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
143149
; NO-VP-INLOOP-NEXT: No successors
144150
; NO-VP-INLOOP-EMPTY:
145151
; NO-VP-INLOOP-NEXT: scalar.ph:
152+
; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
146153
; NO-VP-INLOOP-NEXT: No successors
154+
; NO-VP-INLOOP-EMPTY:
155+
; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
147156
; NO-VP-INLOOP-NEXT: }
148157
;
149158
entry:

llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
232232
; CHECK-EMPTY:
233233
; CHECK-NEXT: scalar.ph
234234
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
235+
; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234>
235236
; CHECK-NEXT: No successors
236237
; CHECK-EMPTY:
237238
; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
239+
; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]>
238240
; CHECK-NEXT: }
239241
;
240242
entry:

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
165165
; CHECK-NEXT: No successors
166166
; CHECK-EMPTY:
167167
; CHECK-NEXT: scalar.ph
168+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
168169
; CHECK-NEXT: No successors
170+
; CHECK-EMPTY:
171+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
169172
; CHECK-NEXT: }
170173
;
171174
entry:
@@ -221,7 +224,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
221224
; CHECK-NEXT: No successors
222225
; CHECK-EMPTY:
223226
; CHECK-NEXT: scalar.ph
227+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
224228
; CHECK-NEXT: No successors
229+
; CHECK-EMPTY:
230+
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
225231
; CHECK-NEXT: }
226232
;
227233
entry:
@@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
447453
; CHECK-NEXT: No successors
448454
; CHECK-EMPTY:
449455
; CHECK-NEXT: scalar.ph
456+
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
450457
; CHECK-NEXT: No successors
458+
; CHECK-EMPTY:
459+
; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]>
451460
; CHECK-NEXT:}
452461

453462
entry:

0 commit comments

Comments
 (0)