Skip to content

Commit c482b8f

Browse files
committed
[VPlan] Only execute VPExpandSCEVRecipes once and remove them (NFC).
Instead of executing the whole entry VPIRBB twice, first only execute the VPExpandSCEVRecipes and replace their uses with the expanded VPValue, which will be a live-in. This allows removing special logic in VPExpandSCEVRecipe to support executing twice and allows moving the ExpandedSCEVs map out of VPTransformState. It will also allow adding other recipes to the entry VPBB in the future.
1 parent 2fe7585 commit c482b8f

File tree

7 files changed

+27
-37
lines changed

7 files changed

+27
-37
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7695,8 +7695,21 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76957695

76967696
// 0. Generate SCEV-dependent code in the entry, including TripCount, before
76977697
// making any changes to the CFG.
7698-
if (!BestVPlan.getEntry()->empty())
7699-
BestVPlan.getEntry()->execute(&State);
7698+
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
7699+
auto *Entry = cast<VPIRBasicBlock>(BestVPlan.getEntry());
7700+
State.Builder.SetInsertPoint(Entry->getIRBasicBlock()->getTerminator());
7701+
for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
7702+
auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
7703+
if (!ExpSCEV)
7704+
continue;
7705+
ExpSCEV->execute(State);
7706+
ExpandedSCEVs[ExpSCEV->getSCEV()] = State.get(ExpSCEV, VPLane(0));
7707+
VPValue *Exp = BestVPlan.getOrAddLiveIn(ExpandedSCEVs[ExpSCEV->getSCEV()]);
7708+
ExpSCEV->replaceAllUsesWith(Exp);
7709+
if (BestVPlan.getTripCount() == ExpSCEV)
7710+
BestVPlan.resetTripCount(Exp);
7711+
ExpSCEV->eraseFromParent();
7712+
}
77007713

77017714
if (!ILV.getTripCount())
77027715
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
@@ -7706,9 +7719,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77067719

77077720
// 1. Set up the skeleton for vectorization, including vector pre-header and
77087721
// middle block. The vector loop is created during VPlan execution.
7709-
VPBasicBlock *VectorPH =
7710-
cast<VPBasicBlock>(BestVPlan.getEntry()->getSingleSuccessor());
7711-
7722+
VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor());
77127723
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
77137724
if (VectorizingEpilogue)
77147725
VPlanTransforms::removeDeadRecipes(BestVPlan);
@@ -7821,7 +7832,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78217832
}
78227833
}
78237834

7824-
return State.ExpandedSCEVs;
7835+
return ExpandedSCEVs;
78257836
}
78267837

78277838
//===--------------------------------------------------------------------===//

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -355,10 +355,6 @@ struct VPTransformState {
355355
/// memchecks. The actually versioning is performed manually.
356356
LoopVersioning *LVer = nullptr;
357357

358-
/// Map SCEVs to their expanded values. Populated when executing
359-
/// VPExpandSCEVRecipes.
360-
DenseMap<const SCEV *, Value *> ExpandedSCEVs;
361-
362358
/// VPlan-based type analysis.
363359
VPTypeAnalysis TypeAnalysis;
364360

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3444,23 +3444,10 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
34443444

34453445
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
34463446
assert(!State.Lane && "cannot be used in per-lane");
3447-
if (State.ExpandedSCEVs.contains(Expr)) {
3448-
// SCEV Expr has already been expanded, result must already be set. At the
3449-
// moment we have to execute the entry block twice (once before skeleton
3450-
// creation to get expanded SCEVs used by the skeleton and once during
3451-
// regular VPlan execution).
3452-
State.Builder.SetInsertPoint(State.CFG.VPBB2IRBB[getParent()]);
3453-
assert(State.get(this, VPLane(0)) == State.ExpandedSCEVs[Expr] &&
3454-
"Results must match");
3455-
return;
3456-
}
3457-
34583447
const DataLayout &DL = SE.getDataLayout();
34593448
SCEVExpander Exp(SE, DL, "induction", /*PreserveLCSSA=*/true);
3460-
34613449
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
34623450
&*State.Builder.GetInsertPoint());
3463-
State.ExpandedSCEVs[Expr] = Res;
34643451
State.set(this, Res, VPLane(0));
34653452
}
34663453

llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
152152
; CHECK-NEXT: Live-in ir<[[VF:%.+]]> = VF
153153
; CHECK-NEXT: Live-in ir<[[VFxUF:%.+]]>.1 = VF * UF
154154
; CHECK-NEXT: Live-in ir<[[VEC_TC:%.+]]> = vector-trip-count
155-
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
155+
; CHECK-NEXT: ir<%0> = original trip-count
156156
; CHECK-EMPTY:
157157
; CHECK-NEXT: ir-bb<for.body.preheader>:
158158
; CHECK-NEXT: IR %0 = zext i32 %n to i64
159-
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
160159
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.scevcheck>
161160
; CHECK-EMPTY:
162161
; CHECK-NEXT: ir-bb<vector.scevcheck>:
@@ -213,7 +212,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
213212
; CHECK-NEXT: Successor(s): middle.block
214213
; CHECK-EMPTY:
215214
; CHECK-NEXT: middle.block:
216-
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
215+
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
217216
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
218217
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>
219218
; CHECK-EMPTY:
@@ -402,11 +401,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
402401
; CHECK-NEXT: Live-in ir<[[VF:%.+]]> = VF
403402
; CHECK-NEXT: Live-in ir<[[VFxUF:%.+]]>.1 = VF * UF
404403
; CHECK-NEXT: Live-in ir<[[VEC_TC:%.+]]> = vector-trip-count
405-
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
404+
; CHECK-NEXT: ir<%0> = original trip-count
406405
; CHECK-EMPTY:
407406
; CHECK-NEXT: ir-bb<for.body.preheader>:
408407
; CHECK-NEXT: IR %0 = zext i32 %n to i64
409-
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 %n to i64)
410408
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.scevcheck>
411409
; CHECK-EMPTY:
412410
; CHECK-NEXT: ir-bb<vector.scevcheck>:
@@ -463,7 +461,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
463461
; CHECK-NEXT: Successor(s): middle.block
464462
; CHECK-EMPTY:
465463
; CHECK-NEXT: middle.block:
466-
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
464+
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%0>, ir<[[VEC_TC]]>
467465
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
468466
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>
469467
; CHECK-EMPTY:

llvm/test/Transforms/LoopVectorize/pr45259.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ define i8 @widget(ptr %arr, i8 %t9) {
1616
; CHECK-NEXT: [[T1_0_LCSSA:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
1717
; CHECK-NEXT: [[T1_0_LCSSA4:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
1818
; CHECK-NEXT: [[T1_0_LCSSA1:%.*]] = phi ptr [ [[T1_0]], [[BB6]] ]
19+
; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64
20+
; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA4]] to i64
1921
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[ARR1]] to i32
2022
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[TMP0]]
21-
; CHECK-NEXT: [[T1_0_LCSSA3:%.*]] = ptrtoint ptr [[T1_0_LCSSA]] to i64
2223
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[T1_0_LCSSA3]] to i32
2324
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], [[TMP2]]
24-
; CHECK-NEXT: [[T1_0_LCSSA2:%.*]] = ptrtoint ptr [[T1_0_LCSSA4]] to i64
2525
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4
2626
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
2727
; CHECK: vector.scevcheck:

llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
55
; CHECK: VPlan 'Final VPlan for VF={2},UF={1}' {
66
; CHECK-NEXT: Live-in ir<[[VFxUF:.+]]> = VF * UF
77
; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count
8-
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
8+
; CHECK-NEXT: ir<%0> = original trip-count
99
; CHECK-EMPTY:
1010
; CHECK-NEXT: ir-bb<entry>:
11-
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64))
1211
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
1312
; CHECK-EMPTY:
1413
; CHECK-NEXT: ir-bb<vector.ph>:
@@ -86,7 +85,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
8685
; CHECK-NEXT: Successor(s): middle.block
8786
; CHECK-EMPTY:
8887
; CHECK-NEXT: middle.block:
89-
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VTC]]>
88+
; CHECK-NEXT: EMIT vp<[[MIDDLE_CMP:%.+]]> = icmp eq ir<%0>, ir<[[VTC]]>
9089
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_CMP]]>
9190
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
9291
; CHECK-EMPTY:

llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,10 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
6262
; CHECK: Executing best plan with VF=8, UF=2
6363
; CHECK-NEXT: VPlan 'Final VPlan for VF={8},UF={2}' {
6464
; CHECK-NEXT: Live-in ir<[[VTC:%.+]]> = vector-trip-count
65-
; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
65+
; CHECK-NEXT: ir<%and> = original trip-count
6666
; CHECK-EMPTY:
6767
; CHECK-NEXT: ir-bb<entry>:
6868
; CHECK-NEXT: IR %and = and i64 %N, 15
69-
; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i4 (trunc i64 %N to i4) to i64)
7069
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
7170
; CHECK-EMPTY:
7271
; CHECK-NEXT: ir-bb<vector.ph>:
@@ -92,7 +91,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
9291
; CHECK-NEXT: Successor(s): middle.block
9392
; CHECK-EMPTY:
9493
; CHECK-NEXT: middle.block:
95-
; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VTC]]>
94+
; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq ir<%and>, ir<[[VTC]]>
9695
; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]>
9796
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
9897
; CHECK-EMPTY:

0 commit comments

Comments
 (0)