Skip to content

Commit 25ec71c

Browse files
committed
[VPlan] Compute induction end values in VPlan.
Use createDerivedIV to compute IV end values directly in VPlan, instead of creating them up-front. This allows updating IV users outside the loop as follow-up. Depends on llvm#110004 and llvm#109975.
1 parent a868a1e commit 25ec71c

33 files changed

+349
-249
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,7 @@ class VPBuilder {
233233

234234
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
235235
FPMathOperator *FPBinOp, VPValue *Start,
236-
VPCanonicalIVPHIRecipe *CanonicalIV,
237-
VPValue *Step) {
236+
VPValue *CanonicalIV, VPValue *Step) {
238237
return tryInsertInstruction(
239238
new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step));
240239
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 130 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2575,22 +2575,15 @@ void InnerLoopVectorizer::createInductionResumeValue(
25752575
assert(VectorTripCount && "Expected valid arguments");
25762576

25772577
Instruction *OldInduction = Legal->getPrimaryInduction();
2578-
Value *EndValue = nullptr;
25792578
Value *EndValueFromAdditionalBypass = AdditionalBypass.second;
25802579
if (OrigPhi == OldInduction) {
2581-
// We know what the end value is.
2582-
EndValue = VectorTripCount;
25832580
} else {
25842581
IRBuilder<> B(LoopVectorPreHeader->getTerminator());
25852582

25862583
// Fast-math-flags propagate from the original induction instruction.
25872584
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
25882585
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
25892586

2590-
EndValue = emitTransformedIndex(B, VectorTripCount, II.getStartValue(),
2591-
Step, II.getKind(), II.getInductionBinOp());
2592-
EndValue->setName("ind.end");
2593-
25942587
// Compute the end value for the additional bypass (if applicable).
25952588
if (AdditionalBypass.first) {
25962589
B.SetInsertPoint(AdditionalBypass.first,
@@ -2602,26 +2595,6 @@ void InnerLoopVectorizer::createInductionResumeValue(
26022595
}
26032596
}
26042597

2605-
VPBasicBlock *MiddleVPBB =
2606-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
2607-
2608-
VPBasicBlock *ScalarPHVPBB = nullptr;
2609-
if (MiddleVPBB->getNumSuccessors() == 2) {
2610-
// Order is strict: first is the exit block, second is the scalar preheader.
2611-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
2612-
} else {
2613-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
2614-
}
2615-
2616-
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
2617-
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2618-
VPInstruction::ResumePhi,
2619-
{Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2620-
OrigPhi->getDebugLoc(), "bc.resume.val");
2621-
2622-
auto *ScalarLoopHeader =
2623-
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
2624-
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
26252598
InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
26262599
EndValueFromAdditionalBypass};
26272600
}
@@ -7660,10 +7633,22 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76607633
ILV.getOrCreateVectorTripCount(nullptr),
76617634
CanonicalIVStartValue, State);
76627635

7636+
VPBasicBlock *MiddleVPBB =
7637+
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7638+
7639+
VPBasicBlock *ScalarPHVPBB = nullptr;
7640+
if (MiddleVPBB->getNumSuccessors() == 2) {
7641+
// Order is strict: first is the exit block, second is the scalar
7642+
// preheader.
7643+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
7644+
} else {
7645+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
7646+
}
7647+
76637648
BestVPlan.execute(&State);
76647649

76657650
// 2.5 Collect reduction resume values.
7666-
auto *ExitVPBB =
7651+
VPBasicBlock *ExitVPBB =
76677652
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
76687653
for (VPRecipeBase &R : *ExitVPBB) {
76697654
createAndCollectMergePhiForReduction(
@@ -7948,6 +7933,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79487933
// Generate a resume induction for the vector epilogue and put it in the
79497934
// vector epilogue preheader
79507935
Type *IdxTy = Legal->getWidestInductionType();
7936+
79517937
PHINode *EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val");
79527938
EPResumeVal->insertBefore(LoopVectorPreHeader->getFirstNonPHIIt());
79537939
EPResumeVal->addIncoming(EPI.VectorTripCount, VecEpilogueIterationCountCheck);
@@ -8835,6 +8821,74 @@ addUsersInExitBlock(VPlan &Plan,
88358821
}
88368822
}
88378823

8824+
static void addResumeValuesForInductions(VPlan &Plan) {
8825+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
8826+
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
8827+
8828+
VPBuilder Builder(
8829+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()));
8830+
for (VPRecipeBase &R : Header->phis()) {
8831+
PHINode *OrigPhi;
8832+
const InductionDescriptor *ID;
8833+
VPValue *Start;
8834+
VPValue *Step;
8835+
Type *ScalarTy;
8836+
bool IsCanonical = false;
8837+
if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
8838+
if (WideIV->getTruncInst())
8839+
continue;
8840+
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
8841+
ID = &WideIV->getInductionDescriptor();
8842+
Start = WideIV->getStartValue();
8843+
Step = WideIV->getStepValue();
8844+
ScalarTy = WideIV->getScalarType();
8845+
IsCanonical = WideIV->isCanonical();
8846+
} else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
8847+
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
8848+
ID = &WideIV->getInductionDescriptor();
8849+
Start = WideIV->getStartValue();
8850+
Step = WideIV->getOperand(1);
8851+
ScalarTy = Start->getLiveInIRValue()->getType();
8852+
} else {
8853+
continue;
8854+
}
8855+
8856+
VPValue *EndValue = &Plan.getVectorTripCount();
8857+
if (!IsCanonical) {
8858+
EndValue = Builder.createDerivedIV(
8859+
ID->getKind(),
8860+
dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp()), Start,
8861+
&Plan.getVectorTripCount(), Step);
8862+
}
8863+
8864+
if (ScalarTy != TypeInfo.inferScalarType(EndValue)) {
8865+
EndValue =
8866+
Builder.createScalarCast(Instruction::Trunc, EndValue, ScalarTy);
8867+
}
8868+
8869+
VPBasicBlock *MiddleVPBB =
8870+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8871+
8872+
VPBasicBlock *ScalarPHVPBB = nullptr;
8873+
if (MiddleVPBB->getNumSuccessors() == 2) {
8874+
// Order is strict: first is the exit block, second is the scalar
8875+
// preheader.
8876+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8877+
} else {
8878+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
8879+
}
8880+
8881+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
8882+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
8883+
VPInstruction::ResumePhi, {EndValue, Start}, OrigPhi->getDebugLoc(),
8884+
"bc.resume.val");
8885+
8886+
auto *ScalarLoopHeader =
8887+
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
8888+
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8889+
}
8890+
}
8891+
88388892
/// Handle live-outs for first order reductions, both in the scalar preheader
88398893
/// and the original exit block:
88408894
/// 1. Feed a resume value for every FOR from the vector loop to the scalar
@@ -9145,6 +9199,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91459199
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
91469200
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
91479201
addUsersInExitBlock(*Plan, ExitUsersToFix);
9202+
addResumeValuesForInductions(*Plan);
91489203

91499204
// ---------------------------------------------------------------------------
91509205
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9250,6 +9305,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
92509305
bool HasNUW = true;
92519306
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
92529307
DebugLoc());
9308+
addResumeValuesForInductions(*Plan);
92539309
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
92549310
return Plan;
92559311
}
@@ -9533,7 +9589,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
95339589
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
95349590
Kind, cast_if_present<BinaryOperator>(FPBinOp));
95359591
DerivedIV->setName("offset.idx");
9536-
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
9592+
assert((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&
9593+
"IV didn't need transforming?");
95379594

95389595
State.set(this, DerivedIV, VPLane(0));
95399596
}
@@ -10202,6 +10259,50 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1020210259
EPI, &LVL, &CM, BFI, PSI, Checks,
1020310260
*BestMainPlan);
1020410261

10262+
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
10263+
// Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those will need their resume-values computed from the main vector loop. Others can be removed in the main VPlan.
10264+
SmallPtrSet<PHINode *, 2> WidenedPhis;
10265+
for (VPRecipeBase &R :
10266+
BestEpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
10267+
if (!isa<VPWidenIntOrFpInductionRecipe,
10268+
VPWidenPointerInductionRecipe>(&R))
10269+
continue;
10270+
if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10271+
WidenedPhis.insert(
10272+
cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode());
10273+
else
10274+
WidenedPhis.insert(
10275+
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
10276+
}
10277+
VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(
10278+
BestMainPlan->getVectorLoopRegion()->getSingleSuccessor());
10279+
10280+
VPBasicBlock *ScalarPHVPBB = nullptr;
10281+
if (MiddleVPBB->getNumSuccessors() == 2) {
10282+
// Order is strict: first is the exit block, second is the scalar
10283+
// preheader.
10284+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
10285+
} else {
10286+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
10287+
}
10288+
10289+
for (VPRecipeBase &R :
10290+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
10291+
auto *VPIRInst = cast<VPIRInstruction>(&R);
10292+
auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction());
10293+
if (!IRI)
10294+
break;
10295+
if (WidenedPhis.contains(IRI) ||
10296+
!LVL.getInductionVars().contains(IRI))
10297+
continue;
10298+
VPRecipeBase *ResumePhi =
10299+
VPIRInst->getOperand(0)->getDefiningRecipe();
10300+
VPIRInst->setOperand(0, BestMainPlan->getOrAddLiveIn(
10301+
Constant::getNullValue(IRI->getType())));
10302+
ResumePhi->eraseFromParent();
10303+
}
10304+
VPlanTransforms::removeDeadRecipes(*BestMainPlan);
10305+
1020510306
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
1020610307
*BestMainPlan, MainILV, DT, true);
1020710308
++LoopsVectorized;
@@ -10210,7 +10311,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1021010311
// edges from the first pass.
1021110312
EPI.MainLoopVF = EPI.EpilogueVF;
1021210313
EPI.MainLoopUF = EPI.EpilogueUF;
10213-
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
1021410314
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
1021510315
ORE, EPI, &LVL, &CM, BFI, PSI,
1021610316
Checks, BestEpiPlan);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
6363
case VPInstruction::FirstOrderRecurrenceSplice:
6464
case VPInstruction::LogicalAnd:
6565
case VPInstruction::PtrAdd:
66+
case VPInstruction::ResumePhi:
6667
return false;
6768
default:
6869
return true;

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1313
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
1414
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
1515
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
16-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1716
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1817
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
18+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
2020
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
2121
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
@@ -102,9 +102,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
102102
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
103103
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
104104
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
105-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
106105
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
107106
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
107+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
108108
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
109109
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
110110
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -785,11 +785,11 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
785785
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 257, [[TMP2]]
786786
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
787787
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
788+
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
789+
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
788790
; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 8
789791
; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
790792
; PRED-NEXT: [[IND_END1:%.*]] = mul i64 [[N_VEC]], 2
791-
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
792-
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
793793
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
794794
; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2
795795
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -522,31 +522,31 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
522522
; PRED: pred.store.continue:
523523
; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
524524
; PRED-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
525-
; PRED: pred.store.if2:
525+
; PRED: pred.store.if3:
526526
; PRED-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP18]], i32 1
527527
; PRED-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP24]]
528528
; PRED-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 1
529529
; PRED-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4
530530
; PRED-NEXT: br label [[PRED_STORE_CONTINUE4]]
531-
; PRED: pred.store.continue3:
531+
; PRED: pred.store.continue4:
532532
; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
533533
; PRED-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
534-
; PRED: pred.store.if4:
534+
; PRED: pred.store.if5:
535535
; PRED-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP18]], i32 2
536536
; PRED-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP28]]
537537
; PRED-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], 2
538538
; PRED-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4
539539
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
540-
; PRED: pred.store.continue5:
540+
; PRED: pred.store.continue6:
541541
; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
542542
; PRED-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
543-
; PRED: pred.store.if6:
543+
; PRED: pred.store.if7:
544544
; PRED-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP18]], i32 3
545545
; PRED-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP32]]
546546
; PRED-NEXT: [[TMP34:%.*]] = add i32 [[OFFSET_IDX]], 3
547547
; PRED-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4
548548
; PRED-NEXT: br label [[PRED_STORE_CONTINUE8]]
549-
; PRED: pred.store.continue7:
549+
; PRED: pred.store.continue8:
550550
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
551551
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]])
552552
; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
@@ -719,31 +719,31 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
719719
; PRED: pred.store.continue:
720720
; PRED-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
721721
; PRED-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
722-
; PRED: pred.store.if1:
722+
; PRED: pred.store.if2:
723723
; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1
724724
; PRED-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP23]]
725725
; PRED-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 1
726726
; PRED-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4
727727
; PRED-NEXT: br label [[PRED_STORE_CONTINUE3]]
728-
; PRED: pred.store.continue2:
728+
; PRED: pred.store.continue3:
729729
; PRED-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
730730
; PRED-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
731-
; PRED: pred.store.if3:
731+
; PRED: pred.store.if4:
732732
; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2
733733
; PRED-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]]
734734
; PRED-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], 2
735735
; PRED-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4
736736
; PRED-NEXT: br label [[PRED_STORE_CONTINUE5]]
737-
; PRED: pred.store.continue4:
737+
; PRED: pred.store.continue5:
738738
; PRED-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
739739
; PRED-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
740-
; PRED: pred.store.if5:
740+
; PRED: pred.store.if6:
741741
; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3
742742
; PRED-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP31]]
743743
; PRED-NEXT: [[TMP33:%.*]] = add i32 [[OFFSET_IDX]], 3
744744
; PRED-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4
745745
; PRED-NEXT: br label [[PRED_STORE_CONTINUE7]]
746-
; PRED: pred.store.continue6:
746+
; PRED: pred.store.continue7:
747747
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
748748
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]])
749749
; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
@@ -863,8 +863,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
863863
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1
864864
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
865865
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
866-
; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
867866
; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1
867+
; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
868868
; PRED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
869869
; PRED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer
870870
; PRED-NEXT: br label [[LOOP:%.*]]

0 commit comments

Comments
 (0)