Skip to content

Commit 5192da5

Browse files
committed
[VPlan] Model FOR resume value extraction in VPlan.
This patch introduces a new ExtractRecurrenceResume VPInstruction opcode to extract the value of a FOR to be used as resume value for the ph in the scalar loop. Note that it takes a VPFirstOrderRecurrencePHIRecipe as operand. This is needed at the moment to conveniently let fixFixedOrderRecurrence still handle creating and patching up the phis in the scalar pre-header and header. As ExtractRecurrenceResume won't have any users yet, it is marked as having side-effects until the phi in the scalar pre-header is also created and managed by VPlan. Depends on llvm#93395
1 parent c631131 commit 5192da5

18 files changed

+165
-141
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 18 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -608,8 +608,7 @@ class InnerLoopVectorizer {
608608

609609
/// Create the exit value of first order recurrences in the middle block and
610610
/// update their users.
611-
void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
612-
VPTransformState &State);
611+
void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State);
613612

614613
/// Iteratively sink the scalarized operands of a predicated instruction into
615614
/// the block that was created for it.
@@ -3391,16 +3390,16 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
33913390
fixNonInductionPHIs(Plan, State);
33923391

33933392
// At this point every instruction in the original loop is widened to a
3394-
// vector form. Now we need to fix the recurrences in the loop. These PHI
3395-
// nodes are currently empty because we did not want to introduce cycles.
3396-
// This is the second stage of vectorizing recurrences. Note that fixing
3397-
// reduction phis are already modeled in VPlan.
3398-
// TODO: Also model fixing fixed-order recurrence phis in VPlan.
3399-
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
3400-
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
3401-
for (VPRecipeBase &R : HeaderVPBB->phis()) {
3402-
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
3403-
fixFixedOrderRecurrence(FOR, State);
3393+
// vector form. Note that fixing reduction phis, as well as extracting the
3394+
// exit and resume values for fixed-order recurrences are already modeled in
3395+
// VPlan. All that remains to do here is creating a phi in the scalar
3396+
// pre-header for each fixed-rder recurrence resume value.
3397+
// TODO: Also model creating phis in the scalar pre-header in VPlan.
3398+
for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) {
3399+
if (!Legal->isFixedOrderRecurrence(LO->getPhi()))
3400+
continue;
3401+
fixFixedOrderRecurrence(LO, State);
3402+
Plan.removeLiveOut(LO->getPhi());
34043403
}
34053404

34063405
// Forget the original basic block.
@@ -3416,6 +3415,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
34163415
for (PHINode &PN : Exit->phis())
34173416
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
34183417

3418+
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
34193419
VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock();
34203420
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
34213421
if (Cost->requiresScalarEpilogue(VF.isVector())) {
@@ -3469,78 +3469,18 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
34693469
VF.getKnownMinValue() * UF);
34703470
}
34713471

3472-
void InnerLoopVectorizer::fixFixedOrderRecurrence(
3473-
VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) {
3474-
// This is the second phase of vectorizing first-order recurrences. An
3475-
// overview of the transformation is described below. Suppose we have the
3476-
// following loop.
3477-
//
3478-
// for (int i = 0; i < n; ++i)
3479-
// b[i] = a[i] - a[i - 1];
3480-
//
3481-
// There is a first-order recurrence on "a". For this loop, the shorthand
3482-
// scalar IR looks like:
3483-
//
3484-
// scalar.ph:
3485-
// s_init = a[-1]
3486-
// br scalar.body
3487-
//
3488-
// scalar.body:
3489-
// i = phi [0, scalar.ph], [i+1, scalar.body]
3490-
// s1 = phi [s_init, scalar.ph], [s2, scalar.body]
3491-
// s2 = a[i]
3492-
// b[i] = s2 - s1
3493-
// br cond, scalar.body, ...
3494-
//
3495-
// In this example, s1 is a recurrence because it's value depends on the
3496-
// previous iteration. In the first phase of vectorization, we created a
3497-
// vector phi v1 for s1. We now complete the vectorization and produce the
3498-
// shorthand vector IR shown below (for VF = 4, UF = 1).
3499-
//
3500-
// vector.ph:
3501-
// v_init = vector(..., ..., ..., a[-1])
3502-
// br vector.body
3503-
//
3504-
// vector.body
3505-
// i = phi [0, vector.ph], [i+4, vector.body]
3506-
// v1 = phi [v_init, vector.ph], [v2, vector.body]
3507-
// v2 = a[i, i+1, i+2, i+3];
3508-
// v3 = vector(v1(3), v2(0, 1, 2))
3509-
// b[i, i+1, i+2, i+3] = v2 - v3
3510-
// br cond, vector.body, middle.block
3511-
//
3512-
// middle.block:
3513-
// x = v2(3)
3514-
// br scalar.ph
3515-
//
3516-
// scalar.ph:
3517-
// s_init = phi [x, middle.block], [a[-1], otherwise]
3518-
// br scalar.body
3519-
//
3520-
// After execution completes the vector loop, we extract the next value of
3521-
// the recurrence (x) to use as the initial value in the scalar loop.
3522-
3472+
void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO,
3473+
VPTransformState &State) {
35233474
// Extract the last vector element in the middle block. This will be the
35243475
// initial value for the recurrence when jumping to the scalar loop.
3525-
VPValue *PreviousDef = PhiR->getBackedgeValue();
3526-
Value *Incoming = State.get(PreviousDef, UF - 1);
3527-
auto *ExtractForScalar = Incoming;
3528-
auto *IdxTy = Builder.getInt32Ty();
3529-
Value *RuntimeVF = nullptr;
3530-
if (VF.isVector()) {
3531-
auto *One = ConstantInt::get(IdxTy, 1);
3532-
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
3533-
RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
3534-
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3535-
ExtractForScalar =
3536-
Builder.CreateExtractElement(Incoming, LastIdx, "vector.recur.extract");
3537-
}
3476+
Value *ExtractForScalar = State.get(LO->getOperand(0), UF - 1, true);
35383477

35393478
// Fix the initial value of the original recurrence in the scalar loop.
35403479
Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
3541-
PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingValue());
3480+
PHINode *Phi = LO->getPhi();
35423481
auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init");
3543-
auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue();
3482+
auto *ScalarInit =
3483+
LO->getPhi()->getIncomingValueForBlock(LoopScalarPreHeader);
35443484
for (auto *BB : predecessors(LoopScalarPreHeader)) {
35453485
auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit;
35463486
Start->addIncoming(Incoming, BB);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,55 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
847847
// all users.
848848
RecurSplice->setOperand(0, FOR);
849849

850+
// This is the second phase of vectorizing first-order recurrences. An
851+
// overview of the transformation is described below. Suppose we have the
852+
// following loop.
853+
//
854+
// for (int i = 0; i < n; ++i)
855+
// b[i] = a[i] - a[i - 1];
856+
//
857+
// There is a first-order recurrence on "a". For this loop, the shorthand
858+
// scalar IR looks like:
859+
//
860+
// scalar.ph:
861+
// s_init = a[-1]
862+
// br scalar.body
863+
//
864+
// scalar.body:
865+
// i = phi [0, scalar.ph], [i+1, scalar.body]
866+
// s1 = phi [s_init, scalar.ph], [s2, scalar.body]
867+
// s2 = a[i]
868+
// b[i] = s2 - s1
869+
// br cond, scalar.body, ...
870+
//
871+
// In this example, s1 is a recurrence because it's value depends on the
872+
// previous iteration. In the first phase of vectorization, we created a
873+
// vector phi v1 for s1. We now complete the vectorization and produce the
874+
// shorthand vector IR shown below (for VF = 4, UF = 1).
875+
//
876+
// vector.ph:
877+
// v_init = vector(..., ..., ..., a[-1])
878+
// br vector.body
879+
//
880+
// vector.body
881+
// i = phi [0, vector.ph], [i+4, vector.body]
882+
// v1 = phi [v_init, vector.ph], [v2, vector.body]
883+
// v2 = a[i, i+1, i+2, i+3];
884+
// v3 = vector(v1(3), v2(0, 1, 2))
885+
// b[i, i+1, i+2, i+3] = v2 - v3
886+
// br cond, vector.body, middle.block
887+
//
888+
// middle.block:
889+
// x = v2(3)
890+
// br scalar.ph
891+
//
892+
// scalar.ph:
893+
// s_init = phi [x, middle.block], [a[-1], otherwise]
894+
// br scalar.body
895+
//
896+
// After execution completes the vector loop, we extract the next value of
897+
// the recurrence (x) to use as the initial value in the scalar loop. This
898+
// is modeled by ExtractRecurrenceResume.
850899
Type *IntTy = Plan.getCanonicalIV()->getScalarType();
851900
auto *Result = cast<VPInstruction>(MiddleBuilder.createNaryOp(
852901
VPInstruction::ExtractFromEnd,
@@ -855,6 +904,13 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
855904
{}, "vector.recur.extract.for.phi"));
856905
RecurSplice->replaceUsesWithIf(
857906
Result, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
907+
auto *Resume = MiddleBuilder.createNaryOp(
908+
VPInstruction::ExtractFromEnd,
909+
{FOR->getBackedgeValue(),
910+
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 1))},
911+
{}, "vector.recur.extract");
912+
// Introduce VPUsers modeling the exit values.
913+
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), Resume);
858914
}
859915
return true;
860916
}

llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
4747
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4848
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4949
; CHECK: middle.block:
50-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5150
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15
51+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5252
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
5353
; CHECK: scalar.ph:
5454
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
@@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
154154
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
155155
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
156156
; CHECK: middle.block:
157-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
158157
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15
159158
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
160159
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15
160+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
161161
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
162162
; CHECK: scalar.ph:
163163
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
127127
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
128128
; CHECK: middle.block:
129129
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
130-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
131130
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
131+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
132132
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
133133
; CHECK: scalar.ph:
134134
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -786,8 +786,8 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur
786786
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
787787
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
788788
; CHECK: middle.block:
789-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
790789
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15
790+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
791791
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
792792
; CHECK: scalar.ph:
793793
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
@@ -871,8 +871,8 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
871871
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
872872
; CHECK: middle.block:
873873
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14
874-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
875874
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
875+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
876876
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
877877
; CHECK: scalar.ph:
878878
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
100100
; DEFAULT: middle.block:
101101
; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or <vscale x 4 x i32> [[TMP58]], [[TMP57]]
102102
; DEFAULT-NEXT: [[TMP60:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]])
103-
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
104103
; DEFAULT-NEXT: [[TMP61:%.*]] = call i32 @llvm.vscale.i32()
105104
; DEFAULT-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], 4
106105
; DEFAULT-NEXT: [[TMP63:%.*]] = sub i32 [[TMP62]], 1
@@ -109,6 +108,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
109108
; DEFAULT-NEXT: [[TMP65:%.*]] = mul i32 [[TMP64]], 4
110109
; DEFAULT-NEXT: [[TMP66:%.*]] = sub i32 [[TMP65]], 1
111110
; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT13:%.*]] = extractelement <vscale x 4 x i32> [[TMP20]], i32 [[TMP66]]
111+
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
112112
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
113113
; DEFAULT: scalar.ph:
114114
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1509,11 +1509,11 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
15091509
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
15101510
; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
15111511
; CHECK: middle.block:
1512-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
15131512
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
15141513
; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 2
15151514
; CHECK-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP31]], -1
15161515
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]], i32 [[TMP32]]
1516+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
15171517
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
15181518
; CHECK: scalar.ph:
15191519
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
4747
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4848
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4949
; CHECK: middle.block:
50-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5150
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15
51+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5252
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
5353
; CHECK: scalar.ph:
5454
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
@@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
154154
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
155155
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
156156
; CHECK: middle.block:
157-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
158157
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15
159158
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
160159
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15
160+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
161161
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
162162
; CHECK: scalar.ph:
163163
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/X86/pr72969.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ define void @test(ptr %p) {
8383
; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
8484
; VEC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
8585
; VEC: middle.block:
86-
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
8786
; VEC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP28]], i32 3
87+
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
8888
; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
8989
; VEC: scalar.ph:
9090
; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,12 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) {
3333
; CHECK-NEXT: Successor(s): middle.block
3434
; CHECK-EMPTY:
3535
; CHECK-NEXT: middle.block:
36+
; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1>
37+
; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1>
3638
; CHECK-NEXT: No successors
39+
; CHECK-EMPTY:
40+
; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]>
41+
; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2]]>
3742
; CHECK-NEXT: }
3843
;
3944
entry:
@@ -89,7 +94,14 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) {
8994
; CHECK-NEXT: Successor(s): middle.block
9095
; CHECK-EMPTY:
9196
; CHECK-NEXT: middle.block:
97+
; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1>
98+
; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1>
99+
; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]> = extract-from-end vp<[[FOR2_SPLICE]]>, ir<1>
92100
; CHECK-NEXT: No successors
101+
; CHECK-EMPTY:
102+
; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]>
103+
; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2]]>
104+
; CHECK-NEXT: Live-out i16 %for.3 = vp<[[RESUME_3]]>
93105
; CHECK-NEXT: }
94106
;
95107
entry:

0 commit comments

Comments
 (0)