Skip to content

Commit 2bca580

Browse files
committed
Insert StepVectors just before unrollByUF
1 parent c2d8fb8 commit 2bca580

17 files changed

+159
-813
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7783,6 +7783,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77837783
"Trying to execute plan with unsupported VF");
77847784
assert(BestVPlan.hasUF(BestUF) &&
77857785
"Trying to execute plan with unsupported UF");
7786+
VPlanTransforms::materializeStepVectors(BestVPlan);
77867787
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
77877788
// cost model is complete for better cost estimates.
77887789
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
@@ -8467,16 +8468,14 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
84678468

84688469
VPValue *Step =
84698470
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE);
8470-
VPValue *StepVector =
8471-
Plan.getOrAddLiveIn(PoisonValue::get(PhiOrTrunc->getType()));
84728471
if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
84738472
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
8474-
StepVector, IndDesc, TruncI,
8473+
IndDesc, TruncI,
84758474
TruncI->getDebugLoc());
84768475
}
84778476
assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
8478-
return new VPWidenIntOrFpInductionRecipe(
8479-
Phi, Start, Step, &Plan.getVF(), StepVector, IndDesc, Phi->getDebugLoc());
8477+
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
8478+
IndDesc, Phi->getDebugLoc());
84808479
}
84818480

84828481
VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,30 +1883,21 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
18831883

18841884
public:
18851885
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1886-
VPValue *VF, VPValue *StepVector,
1887-
const InductionDescriptor &IndDesc, DebugLoc DL)
1886+
VPValue *VF, const InductionDescriptor &IndDesc,
1887+
DebugLoc DL)
18881888
: VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
18891889
Step, IndDesc, DL),
18901890
Trunc(nullptr) {
18911891
addOperand(VF);
1892-
// Temporarily use Poison for step-vector, which will only be introduced
1893-
// when needed, when preparing to execute.
1894-
assert(isa<PoisonValue>(StepVector->getLiveInIRValue()));
1895-
addOperand(StepVector);
18961892
}
18971893

18981894
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1899-
VPValue *VF, VPValue *StepVector,
1900-
const InductionDescriptor &IndDesc,
1895+
VPValue *VF, const InductionDescriptor &IndDesc,
19011896
TruncInst *Trunc, DebugLoc DL)
19021897
: VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
19031898
Step, IndDesc, DL),
19041899
Trunc(Trunc) {
19051900
addOperand(VF);
1906-
// Temporarily use Poison for step-vector, which will only be introduced
1907-
// when needed, when preparing to execute.
1908-
assert(isa<PoisonValue>(StepVector->getLiveInIRValue()));
1909-
addOperand(StepVector);
19101901
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
19111902
(void)Metadata;
19121903
if (Trunc)
@@ -1919,7 +1910,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
19191910
VPWidenIntOrFpInductionRecipe *clone() override {
19201911
return new VPWidenIntOrFpInductionRecipe(
19211912
getPHINode(), getStartValue(), getStepValue(), getVFValue(),
1922-
getStepVector(), getInductionDescriptor(), Trunc, getDebugLoc());
1913+
getInductionDescriptor(), Trunc, getDebugLoc());
19231914
}
19241915

19251916
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
@@ -1937,10 +1928,13 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
19371928
VPValue *getVFValue() { return getOperand(2); }
19381929
const VPValue *getVFValue() const { return getOperand(2); }
19391930

1931+
// TODO: Remove once VPWidenIntOrFpInduction is fully expanded in
1932+
// convertToConcreteRecipes.
19401933
VPValue *getStepVector() { return getOperand(3); }
19411934
const VPValue *getStepVector() const { return getOperand(3); }
19421935
void setStepVector(VPValue *V) {
1943-
assert(isa<PoisonValue>(getOperand(3)->getLiveInIRValue()) &&
1936+
assert(cast<VPInstructionWithType>(getStepVector()->getDefiningRecipe())
1937+
->getOpcode() == VPInstruction::StepVector &&
19441938
cast<VPInstructionWithType>(V->getDefiningRecipe())->getOpcode() ==
19451939
VPInstruction::StepVector);
19461940
setOperand(3, V);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,8 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
7070
VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue());
7171
VPValue *Step =
7272
vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
73-
VPValue *StepVector =
74-
Plan->getOrAddLiveIn(PoisonValue::get(Phi->getType()));
7573
NewRecipe = new VPWidenIntOrFpInductionRecipe(
76-
Phi, Start, Step, &Plan->getVF(), StepVector, *II,
77-
Ingredient.getDebugLoc());
74+
Phi, Start, Step, &Plan->getVF(), *II, Ingredient.getDebugLoc());
7875
} else {
7976
assert(isa<VPInstruction>(&Ingredient) &&
8077
"only VPInstructions expected here");
@@ -1226,6 +1223,15 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
12261223
WideIV->setStartValue(NewStart);
12271224
auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
12281225
WideIV->setStepValue(NewStep);
1226+
// TODO: Remove once VPWidenIntOrFpInductionRecipe is fully expanded.
1227+
auto *OldStepVector = cast<VPInstructionWithType>(
1228+
WideIV->getStepVector()->getDefiningRecipe());
1229+
assert(OldStepVector->getOpcode() == VPInstruction::StepVector);
1230+
auto *NewStepVector = new VPInstructionWithType(
1231+
VPInstruction::StepVector, {}, NewIVTy, OldStepVector->getDebugLoc());
1232+
NewStepVector->insertAfter(WideIV->getStepVector()->getDefiningRecipe());
1233+
WideIV->setStepVector(NewStepVector);
1234+
OldStepVector->eraseFromParent();
12291235

12301236
auto *NewBTC = new VPWidenCastRecipe(
12311237
Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount(), NewIVTy);
@@ -2445,23 +2451,6 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
24452451
continue;
24462452
}
24472453

2448-
if (auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
2449-
// Infer an up-to-date type since
2450-
// optimizeVectorInductionWidthForTCAndVFUF may have truncated the start
2451-
// and step values.
2452-
Type *Ty = TypeInfo.inferScalarType(IVR->getStartValue());
2453-
if (TruncInst *Trunc = IVR->getTruncInst())
2454-
Ty = Trunc->getType();
2455-
if (Ty->isFloatingPointTy())
2456-
Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits());
2457-
2458-
VPBuilder Builder(Plan.getVectorPreheader());
2459-
VPInstruction *StepVector = Builder.createNaryOp(
2460-
VPInstruction::StepVector, {}, Ty, {}, R.getDebugLoc());
2461-
IVR->setStepVector(StepVector);
2462-
continue;
2463-
}
2464-
24652454
VPValue *VectorStep;
24662455
VPValue *ScalarStep;
24672456
if (!match(&R, m_VPInstruction<VPInstruction::WideIVStep>(
@@ -2606,6 +2595,29 @@ void VPlanTransforms::handleUncountableEarlyExit(
26062595
LatchExitingBranch->eraseFromParent();
26072596
}
26082597

2598+
void VPlanTransforms::materializeStepVectors(VPlan &Plan) {
2599+
for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
2600+
auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
2601+
if (!IVR)
2602+
continue;
2603+
2604+
// Infer an up-to-date type since
2605+
// optimizeVectorInductionWidthForTCAndVFUF may have truncated the start
2606+
// and step values.
2607+
Type *Ty = IVR->getPHINode()->getType();
2608+
if (TruncInst *Trunc = IVR->getTruncInst())
2609+
Ty = Trunc->getType();
2610+
if (Ty->isFloatingPointTy())
2611+
Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits());
2612+
2613+
VPBuilder Builder(Plan.getVectorPreheader());
2614+
VPInstruction *StepVector = Builder.createNaryOp(
2615+
VPInstruction::StepVector, {}, Ty, {}, IVR->getDebugLoc());
2616+
assert(IVR->getNumOperands() == 3);
2617+
IVR->addOperand(StepVector);
2618+
}
2619+
}
2620+
26092621
void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
26102622
if (Plan.hasScalarVFOnly())
26112623
return;

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,11 @@ struct VPlanTransforms {
199199
optimizeInductionExitUsers(VPlan &Plan,
200200
DenseMap<VPValue *, VPValue *> &EndValues);
201201

202+
/// Materialize VPInstruction::StepVectors for VPWidenIntOrFpInductionRecipes.
203+
/// TODO: Remove once all of VPWidenIntOrFpInductionRecipe is expanded in
204+
/// convertToConcreteRecipes.
205+
static void materializeStepVectors(VPlan &Plan);
206+
202207
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
203208
static void materializeBroadcasts(VPlan &Plan);
204209

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1616
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1717
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
1818
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
19+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
1920
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
2021
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
21-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
2222
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
2323
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
2424
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
@@ -100,9 +100,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
100100
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
101101
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
102102
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
103+
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
103104
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
104105
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
105-
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
106106
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
107107
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
108108
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
123123
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
124124
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
125125
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
126+
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
126127
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
127128
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
128-
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
129129
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
130130
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
131131
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
@@ -246,9 +246,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
246246
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
247247
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
248248
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
249+
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
249250
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
250251
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
251-
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
252252
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
253253
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
254254
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]

llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -517,13 +517,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
517517
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
518518
; DEFAULT-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
519519
; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
520+
; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
520521
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
521522
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
522523
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
523524
; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
524525
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
525526
; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
526-
; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
527527
; DEFAULT-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
528528
; DEFAULT-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
529529
; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
@@ -593,13 +593,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
593593
; OPTSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
594594
; OPTSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
595595
; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
596+
; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
596597
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
597598
; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
598599
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
599600
; OPTSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
600601
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
601602
; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
602-
; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
603603
; OPTSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
604604
; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
605605
; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
@@ -669,13 +669,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
669669
; MINSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
670670
; MINSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
671671
; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
672+
; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
672673
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
673674
; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
674675
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
675676
; MINSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
676677
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
677678
; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
678-
; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
679679
; MINSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
680680
; MINSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
681681
; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8

0 commit comments

Comments
 (0)