Skip to content

Commit 2c513e7

Browse files
committed
[LV] Optimise users of induction variables in early exit blocks
This is the second of two PRs that attempts to improve the IR generated in the exit blocks of vectorised loops with uncountable early exits. It follows on from PR #128880. In this PR I am improving the generated code for users of induction variables in early exit blocks. This required adding a new VPInstruction called FirstActiveLane, which calculates the index of the first active predicate in the mask operand. I have added a new function optimizeEarlyExitInductionUser that is called from optimizeInductionExitUsers when handling with users in early exit blocks.
1 parent 235ceac commit 2c513e7

File tree

6 files changed

+149
-160
lines changed

6 files changed

+149
-160
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
882882
// Extracts the first active lane of a vector, where the first operand is
883883
// the predicate, and the second operand is the vector to extract.
884884
ExtractFirstActive,
885+
// Calculates the first active lane index of the vector predicate operand.
886+
FirstActiveLane,
885887
};
886888

887889
private:

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
8585
return VecTy->getElementType();
8686
return BaseTy;
8787
}
88+
case VPInstruction::FirstActiveLane:
89+
return IntegerType::get(Ctx, 64);
8890
case VPInstruction::LogicalAnd:
8991
assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
9092
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
712712
Builder.getInt64Ty(), Mask, true, "first.active.lane");
713713
return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
714714
}
715+
case VPInstruction::FirstActiveLane: {
716+
Value *Mask = State.get(getOperand(0));
717+
return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask,
718+
true, "first.active.lane");
719+
}
715720
default:
716721
llvm_unreachable("Unsupported opcode for instruction");
717722
}
@@ -754,6 +759,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
754759
bool VPInstruction::isVectorToScalar() const {
755760
return getOpcode() == VPInstruction::ExtractFromEnd ||
756761
getOpcode() == VPInstruction::ExtractFirstActive ||
762+
getOpcode() == VPInstruction::FirstActiveLane ||
757763
getOpcode() == VPInstruction::ComputeReductionResult ||
758764
getOpcode() == VPInstruction::AnyOf;
759765
}
@@ -946,6 +952,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
946952
case VPInstruction::ExtractFirstActive:
947953
O << "extract-first-active";
948954
break;
955+
case VPInstruction::FirstActiveLane:
956+
O << "first-active-lane";
957+
break;
949958
default:
950959
O << Instruction::getOpcodeName(getOpcode());
951960
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,64 @@ static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) {
737737
return IsWideIVInc() ? WideIV : nullptr;
738738
}
739739

740+
/// Attempts to optimize the induction variable exit values for users in the
741+
/// early exit block.
742+
static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
743+
VPTypeAnalysis &TypeInfo,
744+
VPBlockBase *PredVPBB,
745+
VPValue *Op) {
746+
using namespace VPlanPatternMatch;
747+
748+
VPValue *Incoming, *Mask;
749+
if (!match(Op, m_VPInstruction<VPInstruction::ExtractFirstActive>(
750+
m_VPValue(Incoming), m_VPValue(Mask))))
751+
return nullptr;
752+
753+
auto *WideIV = getOptimizableIVOf(Incoming);
754+
if (!WideIV)
755+
return nullptr;
756+
757+
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
758+
if (WideIntOrFp && WideIntOrFp->getTruncInst())
759+
return nullptr;
760+
761+
// Calculate the final index.
762+
VPValue *EndValue = Plan.getCanonicalIV();
763+
auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
764+
VPBuilder B(cast<VPBasicBlock>(PredVPBB));
765+
766+
DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc();
767+
VPValue *FirstActiveLane =
768+
B.createNaryOp(VPInstruction::FirstActiveLane, Mask, DL);
769+
if (CanonicalIVType != TypeInfo.inferScalarType(FirstActiveLane)) {
770+
Instruction::CastOps CastOp = CanonicalIVType->getScalarSizeInBits() < 64
771+
? Instruction::Trunc
772+
: Instruction::ZExt;
773+
FirstActiveLane =
774+
B.createScalarCast(CastOp, FirstActiveLane, CanonicalIVType, DL);
775+
}
776+
EndValue = B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane}, DL);
777+
778+
// `getOptimizableIVOf()` always returns the pre-incremented IV, so if it
779+
// changed it means the exit is using the incremented value, so we need to
780+
// add the step.
781+
if (Incoming != WideIV) {
782+
VPValue *One = Plan.getOrAddLiveIn(ConstantInt::get(CanonicalIVType, 1));
783+
EndValue = B.createNaryOp(Instruction::Add, {EndValue, One}, DL);
784+
}
785+
786+
if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
787+
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
788+
VPValue *Start = WideIV->getStartValue();
789+
VPValue *Step = WideIV->getStepValue();
790+
EndValue = B.createDerivedIV(
791+
ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
792+
Start, EndValue, Step);
793+
}
794+
795+
return EndValue;
796+
}
797+
740798
/// Attempts to optimize the induction variable exit values for users in the
741799
/// exit block coming from the latch in the original scalar loop.
742800
static VPValue *
@@ -799,12 +857,15 @@ void VPlanTransforms::optimizeInductionExitUsers(
799857
break;
800858

801859
for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) {
860+
VPValue *Escape = nullptr;
802861
if (PredVPBB == MiddleVPBB)
803-
if (VPValue *Escape = optimizeLatchExitInductionUser(
804-
Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx),
805-
EndValues))
806-
ExitIRI->setOperand(Idx, Escape);
807-
// TODO: Optimize early exit induction users in follow-on patch.
862+
Escape = optimizeLatchExitInductionUser(
863+
Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), EndValues);
864+
else
865+
Escape = optimizeEarlyExitInductionUser(Plan, TypeInfo, PredVPBB,
866+
ExitIRI->getOperand(Idx));
867+
if (Escape)
868+
ExitIRI->setOperand(Idx, Escape);
808869
}
809870
}
810871
}

llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll

Lines changed: 10 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,9 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
2525
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
2626
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
2727
; CHECK-NEXT: [[TMP6:%.*]] = add i64 3, [[N_VEC]]
28-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
29-
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 16 x i64> [[TMP7]], splat (i64 1)
30-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i64> splat (i64 3), [[TMP8]]
31-
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
32-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[TMP9]], i64 0
33-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[DOTSPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
3428
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3529
; CHECK: vector.body:
3630
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
37-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3831
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
3932
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 0
4033
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP10]]
@@ -48,7 +41,6 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
4841
; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 16 x i1> [[TMP15]], splat (i1 true)
4942
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP16]])
5043
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
51-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
5244
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
5345
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5446
; CHECK: middle.split:
@@ -58,7 +50,8 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
5850
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
5951
; CHECK: vector.early.exit:
6052
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP16]], i1 true)
61-
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
53+
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
54+
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP20]]
6255
; CHECK-NEXT: br label [[LOOP_END]]
6356
; CHECK: scalar.ph:
6457
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -140,7 +133,8 @@ define i64 @same_exit_block_pre_inc_use4() {
140133
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
141134
; CHECK: vector.early.exit:
142135
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 true)
143-
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <2 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
136+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
137+
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]]
144138
; CHECK-NEXT: br label [[LOOP_END]]
145139
; CHECK: scalar.ph:
146140
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -197,7 +191,6 @@ define i64 @loop_contains_safe_call() #1 {
197191
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
198192
; CHECK: vector.body:
199193
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
200-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
201194
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
202195
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
203196
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[TMP0]]
@@ -209,7 +202,6 @@ define i64 @loop_contains_safe_call() #1 {
209202
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
210203
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
211204
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
212-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
213205
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
214206
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
215207
; CHECK: middle.split:
@@ -218,7 +210,8 @@ define i64 @loop_contains_safe_call() #1 {
218210
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
219211
; CHECK: vector.early.exit:
220212
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
221-
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
213+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
214+
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]]
222215
; CHECK-NEXT: br label [[LOOP_END]]
223216
; CHECK: scalar.ph:
224217
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -283,16 +276,9 @@ define i64 @loop_contains_safe_div() #1 {
283276
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
284277
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
285278
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
286-
; CHECK-NEXT: [[TMP16:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
287-
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 4 x i64> [[TMP16]], splat (i64 1)
288-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> splat (i64 3), [[TMP17]]
289-
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
290-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
291-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
292279
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
293280
; CHECK: vector.body:
294281
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
295-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
296282
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 3, [[INDEX2]]
297283
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX1]], 0
298284
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
@@ -304,7 +290,6 @@ define i64 @loop_contains_safe_div() #1 {
304290
; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[TMP14]], splat (i1 true)
305291
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
306292
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]]
307-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
308293
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
309294
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
310295
; CHECK: middle.split:
@@ -314,7 +299,8 @@ define i64 @loop_contains_safe_div() #1 {
314299
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
315300
; CHECK: vector.early.exit:
316301
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP15]], i1 true)
317-
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
302+
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]]
303+
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP16]]
318304
; CHECK-NEXT: br label [[LOOP_END]]
319305
; CHECK: scalar.ph:
320306
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[OFFSET_IDX]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -375,7 +361,6 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
375361
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
376362
; CHECK: vector.body:
377363
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
378-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
379364
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
380365
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
381366
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
@@ -389,7 +374,6 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
389374
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
390375
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
391376
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
392-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
393377
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
394378
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
395379
; CHECK: middle.split:
@@ -399,7 +383,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
399383
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
400384
; CHECK: vector.early.exit:
401385
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
402-
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
386+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
387+
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]]
403388
; CHECK-NEXT: br label [[LOOP_END]]
404389
; CHECK: scalar.ph:
405390
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]

0 commit comments

Comments
 (0)