Skip to content

Commit ccebf7a

Browse files
committed
[VPlan] Properly handle sinking of replicate regions.
This patch updates the code that sinks recipes required for first-order recurrences to properly handle replicate-regions. At the moment, the code would just move the replicate recipe out of its replicate-region, producing an invalid VPlan. When sinking a recipe in a replicate-region, we have to sink the whole region. To do that, we first need to split the block at the target recipe and move the region in between. This patch also adds a splitAt helper to VPBasicBlock to split a VPBasicBlock at a given iterator. Fixes PR50009. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D100751
1 parent 1f7adf8 commit ccebf7a

File tree

4 files changed

+267
-1
lines changed

4 files changed

+267
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9094,6 +9094,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
90949094
for (auto &Entry : SinkAfter) {
90959095
VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
90969096
VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
9097+
90979098
// If the target is in a replication region, make sure to move Sink to the
90989099
// block after it, not into the replication region itself.
90999100
if (auto *Region =
@@ -9106,7 +9107,37 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
91069107
continue;
91079108
}
91089109
}
9109-
Sink->moveAfter(Target);
9110+
9111+
auto *SinkRegion =
9112+
dyn_cast_or_null<VPRegionBlock>(Sink->getParent()->getParent());
9113+
// Unless the sink source is in a replicate region, sink the recipe
9114+
// directly.
9115+
if (!SinkRegion || !SinkRegion->isReplicator()) {
9116+
Sink->moveAfter(Target);
9117+
continue;
9118+
}
9119+
9120+
// If the sink source is in a replicate region, we need to move the whole
9121+
// replicate region, which should only contain a single recipe in the main
9122+
// block.
9123+
assert(Sink->getParent()->size() == 1 &&
9124+
"parent must be a replicator with a single recipe");
9125+
auto *SplitBlock =
9126+
Target->getParent()->splitAt(std::next(Target->getIterator()));
9127+
9128+
auto *Pred = SinkRegion->getSinglePredecessor();
9129+
auto *Succ = SinkRegion->getSingleSuccessor();
9130+
VPBlockUtils::disconnectBlocks(Pred, SinkRegion);
9131+
VPBlockUtils::disconnectBlocks(SinkRegion, Succ);
9132+
VPBlockUtils::connectBlocks(Pred, Succ);
9133+
9134+
auto *SplitPred = SplitBlock->getSinglePredecessor();
9135+
9136+
VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock);
9137+
VPBlockUtils::connectBlocks(SplitPred, SinkRegion);
9138+
VPBlockUtils::connectBlocks(SinkRegion, SplitBlock);
9139+
if (VPBB == SplitPred)
9140+
VPBB = SplitBlock;
91109141
}
91119142

91129143
// Interleave memory: for each Interleave Group we marked earlier as relevant

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,32 @@ void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
400400
}
401401
}
402402

403+
VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
404+
assert(SplitAt->getParent() == this &&
405+
"can only split at a position in the same block");
406+
407+
SmallVector<VPBlockBase *, 2> Succs(getSuccessors().begin(),
408+
getSuccessors().end());
409+
// First, disconnect the current block from its successors.
410+
for (VPBlockBase *Succ : Succs)
411+
VPBlockUtils::disconnectBlocks(this, Succ);
412+
413+
// Create new empty block after the block to split.
414+
auto *SplitBlock = new VPBasicBlock(getName() + ".split");
415+
VPBlockUtils::insertBlockAfter(SplitBlock, this);
416+
417+
// Add successors for block to split to new block.
418+
for (VPBlockBase *Succ : Succs)
419+
VPBlockUtils::connectBlocks(SplitBlock, Succ);
420+
421+
// Finally, move the recipes starting at SplitAt to new block.
422+
for (VPRecipeBase &ToMove :
423+
make_early_inc_range(make_range(SplitAt, this->end())))
424+
ToMove.moveBefore(*SplitBlock, SplitBlock->end());
425+
426+
return SplitBlock;
427+
}
428+
403429
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
404430
void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
405431
VPSlotTracker &SlotTracker) const {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,6 +1531,11 @@ class VPBasicBlock : public VPBlockBase {
15311531

15321532
void dropAllReferences(VPValue *NewValue) override;
15331533

1534+
/// Split current block at \p SplitAt by inserting a new block between the
1535+
/// current block and its successors and moving all recipes starting at
1536+
/// SplitAt to the new block. Returns the new block.
1537+
VPBasicBlock *splitAt(iterator SplitAt);
1538+
15341539
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
15351540
/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
15361541
/// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
; REQUIRES: asserts
2+
; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize 2>&1 | FileCheck %s
3+
4+
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5+
6+
; Test cases for PR50009, which require sinking a replicate-region due to a
7+
; first-order recurrence.
8+
9+
define void @sink_replicate_region_1(i32 %x, i8* %ptr) optsize {
10+
; CHECK-LABEL: sink_replicate_region_1
11+
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
12+
; CHECK-NEXT: loop:
13+
; CHECK-NEXT: WIDEN-PHI %0 = phi 0, %conv
14+
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
15+
; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
16+
; CHECK-NEXT: Successor(s): loop.0
17+
18+
; CHECK: loop.0:
19+
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
20+
; CHECK-NEXT: Successor(s): pred.load
21+
22+
; CHECK: <xVFxUF> pred.load: {
23+
; CHECK-NEXT: pred.load.entry:
24+
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
25+
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
26+
; CHECK-NEXT: CondBit: vp<%3> (loop)
27+
28+
; CHECK: pred.load.if:
29+
; CHECK-NEXT: REPLICATE ir<%lv> = load ir<%gep> (S->V)
30+
; CHECK-NEXT: Successor(s): pred.load.continue
31+
32+
; CHECK: pred.load.continue:
33+
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%lv>
34+
; CHECK-NEXT: No successors
35+
; CHECK-NEXT: }
36+
37+
; CHECK: loop.1:
38+
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<%6>
39+
; CHECK-NEXT: Successor(s): pred.srem
40+
41+
; CHECK: <xVFxUF> pred.srem: {
42+
; CHECK-NEXT: pred.srem.entry:
43+
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
44+
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
45+
; CHECK-NEXT: CondBit: vp<%3> (loop)
46+
47+
; CHECK: pred.srem.if:
48+
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%0>, ir<%x> (S->V)
49+
; CHECK-NEXT: Successor(s): pred.srem.continue
50+
51+
; CHECK: pred.srem.continue:
52+
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%rem>
53+
; CHECK-NEXT: No successors
54+
; CHECK-NEXT: }
55+
56+
; CHECK: loop.1.split:
57+
; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<%9>
58+
; CHECK-NEXT: No successors
59+
; CHECK-NEXT: }
60+
;
61+
entry:
62+
br label %loop
63+
64+
loop:
65+
%0 = phi i32 [ 0, %entry ], [ %conv, %loop ]
66+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
67+
%rem = srem i32 %0, %x
68+
%gep = getelementptr i8, i8* %ptr, i32 %iv
69+
%lv = load i8, i8* %gep
70+
%conv = sext i8 %lv to i32
71+
%add = add i32 %conv, %rem
72+
%iv.next = add nsw i32 %iv, 1
73+
%ec = icmp eq i32 %iv.next, 20001
74+
br i1 %ec, label %exit, label %loop
75+
76+
exit:
77+
ret void
78+
}
79+
80+
define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
81+
; CHECK-LABEL: sink_replicate_region_2
82+
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
83+
; CHECK-NEXT: loop:
84+
; CHECK-NEXT: WIDEN-PHI %recur = phi 0, %recur.next
85+
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
86+
; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
87+
; CHECK-NEXT: Successor(s): loop.0
88+
89+
; CHECK: loop.0:
90+
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
91+
; CHECK-NEXT: Successor(s): pred.srem
92+
93+
; CHECK: <xVFxUF> pred.srem: {
94+
; CHECK-NEXT: pred.srem.entry:
95+
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
96+
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
97+
; CHECK-NEXT: CondBit: vp<%3> (loop)
98+
99+
; CHECK: pred.srem.if:
100+
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x>
101+
; CHECK-NEXT: Successor(s): pred.srem.continue
102+
103+
; CHECK: pred.srem.continue:
104+
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem>
105+
; CHECK-NEXT: No successors
106+
; CHECK-NEXT: }
107+
108+
; CHECK: loop.0.split:
109+
; CHECK-NEXT: REPLICATE ir<%add> = add vp<%6>, ir<%recur.next>
110+
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv>
111+
; CHECK-NEXT: Successor(s): pred.store
112+
113+
; CHECK: <xVFxUF> pred.store: {
114+
; CHECK-NEXT: pred.store.entry:
115+
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
116+
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
117+
; CHECK-NEXT: CondBit: vp<%3> (loop)
118+
119+
; CHECK: pred.store.if:
120+
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
121+
; CHECK-NEXT: Successor(s): pred.store.continue
122+
123+
; CHECK: pred.store.continue:
124+
; CHECK-NEXT: No successors
125+
; CHECK-NEXT: }
126+
127+
; CHECK: loop.1:
128+
; CHECK-NEXT: No successors
129+
; CHECK-NEXT: }
130+
;
131+
entry:
132+
br label %loop
133+
134+
loop:
135+
%recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ]
136+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
137+
%rem = srem i32 %recur, %x
138+
%recur.next = sext i8 %y to i32
139+
%add = add i32 %rem, %recur.next
140+
%gep = getelementptr i32, i32* %ptr, i32 %iv
141+
store i32 %add, i32* %gep
142+
%iv.next = add nsw i32 %iv, 1
143+
%ec = icmp eq i32 %iv.next, 20001
144+
br i1 %ec, label %exit, label %loop
145+
146+
exit:
147+
ret void
148+
}
149+
150+
define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, i32* %ptr) optsize {
151+
; CHECK-LABEL: sink_replicate_region_3_reduction
152+
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
153+
; CHECK-NEXT: loop:
154+
; CHECK-NEXT: WIDEN-PHI %recur = phi 0, %recur.next
155+
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next
156+
; CHECK-NEXT: WIDEN-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next>
157+
; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%iv> vp<%0>
158+
; CHECK-NEXT: Successor(s): loop.0
159+
160+
; CHECK: loop.0:
161+
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
162+
; CHECK-NEXT: Successor(s): pred.srem
163+
164+
; CHECK: <xVFxUF> pred.srem: {
165+
; CHECK-NEXT: pred.srem.entry:
166+
; CHECK-NEXT: BRANCH-ON-MASK vp<%4>
167+
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
168+
; CHECK-NEXT: CondBit: vp<%4> (loop)
169+
170+
; CHECK: pred.srem.if:
171+
; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> (S->V)
172+
; CHECK-NEXT: Successor(s): pred.srem.continue
173+
174+
; CHECK: pred.srem.continue:
175+
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%rem>
176+
; CHECK-NEXT: No successors
177+
; CHECK-NEXT: }
178+
179+
; CHECK: loop.0.split:
180+
; CHECK-NEXT: WIDEN ir<%add> = add vp<%7>, ir<%recur.next>
181+
; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add>
182+
; CHECK-NEXT: EMIT vp<%10> = select vp<%4> ir<%and.red.next> ir<%and.red>
183+
; CHECK-NEXT: No successors
184+
; CHECK-NEXT: }
185+
;
186+
entry:
187+
br label %loop
188+
189+
loop:
190+
%recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ]
191+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
192+
%and.red = phi i32 [ 1234, %entry ], [ %and.red.next, %loop ]
193+
%rem = srem i32 %recur, %x
194+
%recur.next = sext i8 %y to i32
195+
%add = add i32 %rem, %recur.next
196+
%and.red.next = and i32 %and.red, %add
197+
%iv.next = add nsw i32 %iv, 1
198+
%ec = icmp eq i32 %iv.next, 20001
199+
br i1 %ec, label %exit, label %loop
200+
201+
exit:
202+
%res = phi i32 [ %and.red.next, %loop ]
203+
ret i32 %res
204+
}

0 commit comments

Comments
 (0)