Skip to content

Commit 7ed9803

Browse files
committed
[VPlan] Add VPValue for VF, use it for VPWidenIntOrFpInductionRecipe.
Similar to VFxUF, also add a VF VPValue to VPlan and use it to get the runtime VF in VPWidenIntOrFpInductionRecipe. Code for VF is only generated if there are users of VF, to avoid unnecessary test changes. Note: some tests still need updating, will do once we converge on a final version of the patch.
1 parent 7e2b5e2 commit 7ed9803

23 files changed

+87
-132
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8249,10 +8249,12 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
82498249
VPValue *Step =
82508250
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE);
82518251
if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
8252-
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI);
8252+
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, Plan.getVF(),
8253+
IndDesc, TruncI);
82538254
}
82548255
assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
8255-
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc);
8256+
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, Plan.getVF(),
8257+
IndDesc);
82568258
}
82578259

82588260
VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
@@ -8667,6 +8669,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
86678669
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
86688670
Header->insert(CanonicalIVPHI, Header->begin());
86698671

8672+
VPBuilder PhBuilder(cast<VPBasicBlock>(TopRegion->getSinglePredecessor()));
86708673
VPBuilder Builder(TopRegion->getExitingBasicBlock());
86718674
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
86728675
auto *CanonicalIVIncrement = Builder.createOverflowingOp(

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -928,8 +928,21 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
928928

929929
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
930930
// FIXME: Model VF * UF computation completely in VPlan.
931-
VFxUF.setUnderlyingValue(
932-
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF));
931+
Value *RuntimeVF = nullptr;
932+
if (VF.getNumUsers()) {
933+
RuntimeVF = createStepForVF(Builder, TripCountV->getType(), State.VF, 1);
934+
VF.setUnderlyingValue(RuntimeVF);
935+
}
936+
if (RuntimeVF) {
937+
VFxUF.setUnderlyingValue(
938+
State.UF > 1 ? Builder.CreateMul(
939+
VF.getLiveInIRValue(),
940+
ConstantInt::get(TripCountV->getType(), State.UF))
941+
: VF.getLiveInIRValue());
942+
} else {
943+
VFxUF.setUnderlyingValue(
944+
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF));
945+
}
933946

934947
// When vectorizing the epilogue loop, the canonical induction start value
935948
// needs to be changed from zero to the value after the main vector loop.
@@ -1235,6 +1248,7 @@ VPlan *VPlan::duplicate() {
12351248
}
12361249
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
12371250
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1251+
Old2NewVPValues[&VF] = &NewPlan->VF;
12381252
if (BackedgeTakenCount) {
12391253
NewPlan->BackedgeTakenCount = new VPValue();
12401254
Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1770,25 +1770,27 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
17701770

17711771
public:
17721772
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1773-
const InductionDescriptor &IndDesc)
1773+
VPValue *VF, const InductionDescriptor &IndDesc)
17741774
: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
17751775
Trunc(nullptr), IndDesc(IndDesc) {
17761776
addOperand(Step);
1777+
addOperand(VF);
17771778
}
17781779

17791780
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
1780-
const InductionDescriptor &IndDesc,
1781+
VPValue *VF, const InductionDescriptor &IndDesc,
17811782
TruncInst *Trunc)
17821783
: VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
17831784
IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
17841785
addOperand(Step);
1786+
addOperand(VF);
17851787
}
17861788

17871789
~VPWidenIntOrFpInductionRecipe() override = default;
17881790

17891791
VPWidenIntOrFpInductionRecipe *clone() override {
1790-
return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(),
1791-
getStepValue(), IndDesc, Trunc);
1792+
return new VPWidenIntOrFpInductionRecipe(
1793+
IV, getStartValue(), getStepValue(), getOperand(2), IndDesc, Trunc);
17921794
}
17931795

17941796
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
@@ -3282,6 +3284,8 @@ class VPlan {
32823284
/// Represents the vector trip count.
32833285
VPValue VectorTripCount;
32843286

3287+
VPValue VF;
3288+
32853289
/// Represents the loop-invariant VF * UF of the vector loop region.
32863290
VPValue VFxUF;
32873291

@@ -3380,6 +3384,8 @@ class VPlan {
33803384
/// Returns VF * UF of the vector loop region.
33813385
VPValue &getVFxUF() { return VFxUF; }
33823386

3387+
VPValue *getVF() { return &VF; };
3388+
33833389
void addVF(ElementCount VF) { VFs.insert(VF); }
33843390

33853391
void setVF(ElementCount VF) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,11 +1308,11 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
13081308
// Multiply the vectorization factor by the step using integer or
13091309
// floating-point arithmetic as appropriate.
13101310
Type *StepType = Step->getType();
1311-
Value *RuntimeVF;
1311+
Value *RuntimeVF = State.get(getOperand(2), {0, 0});
13121312
if (Step->getType()->isFloatingPointTy())
13131313
RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
13141314
else
1315-
RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
1315+
RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType);
13161316
Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
13171317

13181318
// Create a vector splat to use in the induction update.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
5858
VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue());
5959
VPValue *Step =
6060
vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
61-
NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II);
61+
NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step,
62+
Plan->getVF(), *II);
6263
} else {
6364
assert(isa<VPInstruction>(&Ingredient) &&
6465
"only VPInstructions expected here");

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
2121
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
2222
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
2323
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]]
24-
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
25-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
26-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP11]]
24+
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
2725
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
2826
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
2927
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[VAL]], i64 0
@@ -112,9 +110,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
112110
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
113111
; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 4 x i64> [[TMP8]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
114112
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]]
115-
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
116-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
117-
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP11]]
113+
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
118114
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
119115
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
120116
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[VAL]], i64 0

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@ define void @foo() {
2424
; CHECK-NEXT: [[TMP5:%.*]] = add <vscale x 4 x i64> [[TMP4]], zeroinitializer
2525
; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
2626
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]]
27-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
28-
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
29-
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]
27+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP19]]
3028
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
3129
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
3230
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
3232
; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 2 x i32> [[TMP8]], zeroinitializer
3333
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP9]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
3434
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
35-
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
36-
; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], 2
37-
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP12]]
35+
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
3836
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP13]], i64 0
3937
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
4038
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,9 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
293293
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8
294294
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]]
295295
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
296+
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP3]], 2
296297
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3
297298
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
298-
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
299-
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
300299
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
301300
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
302301
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ define void @induction_i7(ptr %dst) #0 {
1818
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
1919
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7
2020
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
21-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
21+
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2
22+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
2223
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.experimental.stepvector.nxv2i8()
2324
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i7>
2425
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i7> [[TMP7]], zeroinitializer
@@ -92,7 +93,8 @@ define void @induction_i3_zext(ptr %dst) #0 {
9293
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
9394
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3
9495
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
95-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
96+
; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2
97+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2
9698
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i8> @llvm.experimental.stepvector.nxv2i8()
9799
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i8> [[TMP6]] to <vscale x 2 x i3>
98100
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i3> [[TMP7]], zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly
2626
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
2727
; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2
2828
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
29-
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
30-
; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 2
31-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
29+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP5]], i64 0
3230
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
3331
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3432
; CHECK: vector.body:

0 commit comments

Comments
 (0)