Skip to content

Commit 05e1b53

Browse files
authored
[VPlan] Model FOR resume value extraction in VPlan. (#93396)
This patch uses the ExtractFromEnd VPInstruction opcode to extract the value of a FOR to be used as resume value for the ph in the scalar loop. It adds a new live-out that temporarily wraps the FOR phi in the scalar loop. fixFixedOrderRecurrence will process live outs for fixed order recurrence phis by creating a new phi node in the scalar preheader, using the generated value for the live-out as incoming value from the middle block and the original start value as incoming value for the other edge. Creation of the phi in the preheader, as well as updating the phi in the scalar loop will also be moved to VPlan in the future, eventually retiring fixFixedOrderRecurrence Depends on #93395 PR: #93396
1 parent e635520 commit 05e1b53

19 files changed

+206
-152
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 33 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#include "VPlan.h"
6060
#include "VPlanAnalysis.h"
6161
#include "VPlanHCFGBuilder.h"
62+
#include "VPlanPatternMatch.h"
6263
#include "VPlanTransforms.h"
6364
#include "VPlanVerifier.h"
6465
#include "llvm/ADT/APInt.h"
@@ -606,10 +607,9 @@ class InnerLoopVectorizer {
606607
BasicBlock *MiddleBlock, BasicBlock *VectorHeader,
607608
VPlan &Plan, VPTransformState &State);
608609

609-
/// Create the exit value of first order recurrences in the middle block and
610-
/// update their users.
611-
void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
612-
VPTransformState &State);
610+
/// Create the phi node for the resume value of first order recurrences in the
611+
/// scalar preheader and update the users in the scalar loop.
612+
void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State);
613613

614614
/// Iteratively sink the scalarized operands of a predicated instruction into
615615
/// the block that was created for it.
@@ -3391,16 +3391,16 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
33913391
fixNonInductionPHIs(Plan, State);
33923392

33933393
// At this point every instruction in the original loop is widened to a
3394-
// vector form. Now we need to fix the recurrences in the loop. These PHI
3395-
// nodes are currently empty because we did not want to introduce cycles.
3396-
// This is the second stage of vectorizing recurrences. Note that fixing
3397-
// reduction phis are already modeled in VPlan.
3398-
// TODO: Also model fixing fixed-order recurrence phis in VPlan.
3399-
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
3400-
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
3401-
for (VPRecipeBase &R : HeaderVPBB->phis()) {
3402-
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
3403-
fixFixedOrderRecurrence(FOR, State);
3394+
// vector form. Note that fixing reduction phis, as well as extracting the
3395+
// exit and resume values for fixed-order recurrences are already modeled in
3396+
// VPlan. All that remains to do here is to create a phi in the scalar
3397+
// pre-header for each fixed-order recurrence resume value.
3398+
// TODO: Also model creating phis in the scalar pre-header in VPlan.
3399+
for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) {
3400+
if (!Legal->isFixedOrderRecurrence(LO->getPhi()))
3401+
continue;
3402+
fixFixedOrderRecurrence(LO, State);
3403+
Plan.removeLiveOut(LO->getPhi());
34043404
}
34053405

34063406
// Forget the original basic block.
@@ -3416,6 +3416,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
34163416
for (PHINode &PN : Exit->phis())
34173417
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
34183418

3419+
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
34193420
VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock();
34203421
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
34213422
if (Cost->requiresScalarEpilogue(VF.isVector())) {
@@ -3469,85 +3470,31 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
34693470
VF.getKnownMinValue() * UF);
34703471
}
34713472

3472-
void InnerLoopVectorizer::fixFixedOrderRecurrence(
3473-
VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) {
3474-
// This is the second phase of vectorizing first-order recurrences. An
3475-
// overview of the transformation is described below. Suppose we have the
3476-
// following loop.
3477-
//
3478-
// for (int i = 0; i < n; ++i)
3479-
// b[i] = a[i] - a[i - 1];
3480-
//
3481-
// There is a first-order recurrence on "a". For this loop, the shorthand
3482-
// scalar IR looks like:
3483-
//
3484-
// scalar.ph:
3485-
// s_init = a[-1]
3486-
// br scalar.body
3487-
//
3488-
// scalar.body:
3489-
// i = phi [0, scalar.ph], [i+1, scalar.body]
3490-
// s1 = phi [s_init, scalar.ph], [s2, scalar.body]
3491-
// s2 = a[i]
3492-
// b[i] = s2 - s1
3493-
// br cond, scalar.body, ...
3494-
//
3495-
// In this example, s1 is a recurrence because it's value depends on the
3496-
// previous iteration. In the first phase of vectorization, we created a
3497-
// vector phi v1 for s1. We now complete the vectorization and produce the
3498-
// shorthand vector IR shown below (for VF = 4, UF = 1).
3499-
//
3500-
// vector.ph:
3501-
// v_init = vector(..., ..., ..., a[-1])
3502-
// br vector.body
3503-
//
3504-
// vector.body
3505-
// i = phi [0, vector.ph], [i+4, vector.body]
3506-
// v1 = phi [v_init, vector.ph], [v2, vector.body]
3507-
// v2 = a[i, i+1, i+2, i+3];
3508-
// v3 = vector(v1(3), v2(0, 1, 2))
3509-
// b[i, i+1, i+2, i+3] = v2 - v3
3510-
// br cond, vector.body, middle.block
3511-
//
3512-
// middle.block:
3513-
// x = v2(3)
3514-
// br scalar.ph
3515-
//
3516-
// scalar.ph:
3517-
// s_init = phi [x, middle.block], [a[-1], otherwise]
3518-
// br scalar.body
3519-
//
3520-
// After execution completes the vector loop, we extract the next value of
3521-
// the recurrence (x) to use as the initial value in the scalar loop.
3522-
3473+
void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO,
3474+
VPTransformState &State) {
35233475
// Extract the last vector element in the middle block. This will be the
35243476
// initial value for the recurrence when jumping to the scalar loop.
3525-
VPValue *PreviousDef = PhiR->getBackedgeValue();
3526-
Value *Incoming = State.get(PreviousDef, UF - 1);
3527-
auto *ExtractForScalar = Incoming;
3528-
auto *IdxTy = Builder.getInt32Ty();
3529-
Value *RuntimeVF = nullptr;
3530-
if (VF.isVector()) {
3531-
auto *One = ConstantInt::get(IdxTy, 1);
3532-
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
3533-
RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
3534-
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
3535-
ExtractForScalar =
3536-
Builder.CreateExtractElement(Incoming, LastIdx, "vector.recur.extract");
3537-
}
3477+
VPValue *VPExtract = LO->getOperand(0);
3478+
using namespace llvm::VPlanPatternMatch;
3479+
assert(match(VPExtract, m_VPInstruction<VPInstruction::ExtractFromEnd>(
3480+
m_VPValue(), m_VPValue())) &&
3481+
"FOR LiveOut expects to use an extract from end.");
3482+
Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true);
35383483

35393484
// Fix the initial value of the original recurrence in the scalar loop.
3485+
PHINode *ScalarHeaderPhi = LO->getPhi();
3486+
auto *InitScalarFOR =
3487+
ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader);
35403488
Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
3541-
PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingValue());
3542-
auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init");
3543-
auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue();
3489+
auto *ScalarPreheaderPhi =
3490+
Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init");
35443491
for (auto *BB : predecessors(LoopScalarPreHeader)) {
3545-
auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit;
3546-
Start->addIncoming(Incoming, BB);
3492+
auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR;
3493+
ScalarPreheaderPhi->addIncoming(Incoming, BB);
35473494
}
3548-
3549-
Phi->setIncomingValueForBlock(LoopScalarPreHeader, Start);
3550-
Phi->setName("scalar.recur");
3495+
ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader,
3496+
ScalarPreheaderPhi);
3497+
ScalarHeaderPhi->setName("scalar.recur");
35513498
}
35523499

35533500
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3166,7 +3166,9 @@ class VPlan {
31663166
/// definitions are VPValues that hold a pointer to their underlying IR.
31673167
SmallVector<VPValue *, 16> VPLiveInsToFree;
31683168

3169-
/// Values used outside the plan.
3169+
/// Values used outside the plan. It contains live-outs that need fixing. Any
3170+
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3171+
/// live-outs are fixed via VPLiveOut::fixPhi.
31703172
MapVector<PHINode *, VPLiveOut *> LiveOuts;
31713173

31723174
/// Mapping from SCEVs to the VPValues representing their expansions.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -847,14 +847,91 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
847847
// all users.
848848
RecurSplice->setOperand(0, FOR);
849849

850+
// This is the second phase of vectorizing first-order recurrences. An
851+
// overview of the transformation is described below. Suppose we have the
852+
// following loop with some use after the loop of the last a[i-1],
853+
//
854+
// for (int i = 0; i < n; ++i) {
855+
// t = a[i - 1];
856+
// b[i] = a[i] - t;
857+
// }
858+
// use t;
859+
//
860+
// There is a first-order recurrence on "a". For this loop, the shorthand
861+
// scalar IR looks like:
862+
//
863+
// scalar.ph:
864+
// s_init = a[-1]
865+
// br scalar.body
866+
//
867+
// scalar.body:
868+
// i = phi [0, scalar.ph], [i+1, scalar.body]
869+
// s1 = phi [s_init, scalar.ph], [s2, scalar.body]
870+
// s2 = a[i]
871+
// b[i] = s2 - s1
872+
// br cond, scalar.body, exit.block
873+
//
874+
// exit.block:
875+
// use = lcssa.phi [s1, scalar.body]
876+
//
877+
// In this example, s1 is a recurrence because it's value depends on the
878+
// previous iteration. In the first phase of vectorization, we created a
879+
// vector phi v1 for s1. We now complete the vectorization and produce the
880+
// shorthand vector IR shown below (for VF = 4, UF = 1).
881+
//
882+
// vector.ph:
883+
// v_init = vector(..., ..., ..., a[-1])
884+
// br vector.body
885+
//
886+
// vector.body
887+
// i = phi [0, vector.ph], [i+4, vector.body]
888+
// v1 = phi [v_init, vector.ph], [v2, vector.body]
889+
// v2 = a[i, i+1, i+2, i+3];
890+
// v3 = vector(v1(3), v2(0, 1, 2))
891+
// b[i, i+1, i+2, i+3] = v2 - v3
892+
// br cond, vector.body, middle.block
893+
//
894+
// middle.block:
895+
// s_penultimate = v2(2) = v3(3)
896+
// s_resume = v2(3)
897+
// br cond, scalar.ph, exit.block
898+
//
899+
// scalar.ph:
900+
// s_init' = phi [s_resume, middle.block], [s_init, otherwise]
901+
// br scalar.body
902+
//
903+
// scalar.body:
904+
// i = phi [0, scalar.ph], [i+1, scalar.body]
905+
// s1 = phi [s_init', scalar.ph], [s2, scalar.body]
906+
// s2 = a[i]
907+
// b[i] = s2 - s1
908+
// br cond, scalar.body, exit.block
909+
//
910+
// exit.block:
911+
// lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block]
912+
//
913+
// After execution completes the vector loop, we extract the next value of
914+
// the recurrence (x) to use as the initial value in the scalar loop. This
915+
// is modeled by ExtractFromEnd.
850916
Type *IntTy = Plan.getCanonicalIV()->getScalarType();
851-
auto *Result = cast<VPInstruction>(MiddleBuilder.createNaryOp(
917+
918+
// Extract the penultimate value of the recurrence and update VPLiveOut
919+
// users of the recurrence splice.
920+
auto *Penultimate = cast<VPInstruction>(MiddleBuilder.createNaryOp(
852921
VPInstruction::ExtractFromEnd,
853922
{FOR->getBackedgeValue(),
854923
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 2))},
855924
{}, "vector.recur.extract.for.phi"));
856925
RecurSplice->replaceUsesWithIf(
857-
Result, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
926+
Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
927+
928+
// Extract the resume value and create a new VPLiveOut for it.
929+
auto *Resume = MiddleBuilder.createNaryOp(
930+
VPInstruction::ExtractFromEnd,
931+
{FOR->getBackedgeValue(),
932+
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 1))},
933+
{}, "vector.recur.extract");
934+
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), Resume);
858935
}
859936
return true;
860937
}

llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
4747
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4848
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4949
; CHECK: middle.block:
50-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5150
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15
51+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5252
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
5353
; CHECK: scalar.ph:
5454
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
@@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
154154
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
155155
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
156156
; CHECK: middle.block:
157-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
158157
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15
159158
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
160159
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15
160+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
161161
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
162162
; CHECK: scalar.ph:
163163
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
127127
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
128128
; CHECK: middle.block:
129129
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
130-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
131130
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
131+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
132132
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
133133
; CHECK: scalar.ph:
134134
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -786,8 +786,8 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur
786786
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
787787
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
788788
; CHECK: middle.block:
789-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
790789
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15
790+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
791791
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
792792
; CHECK: scalar.ph:
793793
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
@@ -871,8 +871,8 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
871871
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
872872
; CHECK: middle.block:
873873
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14
874-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
875874
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
875+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
876876
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
877877
; CHECK: scalar.ph:
878878
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
100100
; DEFAULT: middle.block:
101101
; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or <vscale x 4 x i32> [[TMP58]], [[TMP57]]
102102
; DEFAULT-NEXT: [[TMP60:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]])
103-
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
104103
; DEFAULT-NEXT: [[TMP61:%.*]] = call i32 @llvm.vscale.i32()
105104
; DEFAULT-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], 4
106105
; DEFAULT-NEXT: [[TMP63:%.*]] = sub i32 [[TMP62]], 1
@@ -109,6 +108,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
109108
; DEFAULT-NEXT: [[TMP65:%.*]] = mul i32 [[TMP64]], 4
110109
; DEFAULT-NEXT: [[TMP66:%.*]] = sub i32 [[TMP65]], 1
111110
; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT13:%.*]] = extractelement <vscale x 4 x i32> [[TMP20]], i32 [[TMP66]]
111+
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
112112
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
113113
; DEFAULT: scalar.ph:
114114
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1509,11 +1509,11 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
15091509
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
15101510
; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
15111511
; CHECK: middle.block:
1512-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
15131512
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
15141513
; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 2
15151514
; CHECK-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP31]], -1
15161515
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]], i32 [[TMP32]]
1516+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
15171517
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
15181518
; CHECK: scalar.ph:
15191519
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
4747
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4848
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4949
; CHECK: middle.block:
50-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5150
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15
51+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
5252
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
5353
; CHECK: scalar.ph:
5454
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
@@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
154154
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
155155
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
156156
; CHECK: middle.block:
157-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
158157
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15
159158
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
160159
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15
160+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
161161
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
162162
; CHECK: scalar.ph:
163163
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ]

llvm/test/Transforms/LoopVectorize/X86/pr72969.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ define void @test(ptr %p) {
8383
; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
8484
; VEC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
8585
; VEC: middle.block:
86-
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
8786
; VEC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP28]], i32 3
87+
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
8888
; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
8989
; VEC: scalar.ph:
9090
; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]

0 commit comments

Comments
 (0)