Skip to content

Commit d5ba9a3

Browse files
committed
[VPlan] Introduce child regions as VPlan transform.
Further simplify VPlan CFG builder by moving introduction of inner regions to a VPlan transform, building on llvm#128419 The HCFG builder now only constructs plain CFGs. I will move it to VPlanConstruction as follow-up.
1 parent 424c8f9 commit d5ba9a3

File tree

7 files changed

+79
-141
lines changed

7 files changed

+79
-141
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -9308,10 +9308,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93089308
Range);
93099309
auto Plan = std::make_unique<VPlan>(OrigLoop);
93109310
// Build hierarchical CFG.
9311-
// Convert to VPlan-transform and consoliate all transforms for VPlan
9311+
// TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
93129312
// creation.
93139313
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
9314-
HCFGBuilder.buildHierarchicalCFG();
9314+
HCFGBuilder.buildPlainCFG();
93159315

93169316
VPlanTransforms::introduceTopLevelVectorLoopRegion(
93179317
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
@@ -9615,7 +9615,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
96159615
auto Plan = std::make_unique<VPlan>(OrigLoop);
96169616
// Build hierarchical CFG
96179617
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
9618-
HCFGBuilder.buildHierarchicalCFG();
9618+
HCFGBuilder.buildPlainCFG();
96199619

96209620
VPlanTransforms::introduceTopLevelVectorLoopRegion(
96219621
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

+48-21
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,57 @@
1414
#include "LoopVectorizationPlanner.h"
1515
#include "VPlan.h"
1616
#include "VPlanCFG.h"
17+
#include "VPlanDominatorTree.h"
1718
#include "VPlanTransforms.h"
1819
#include "llvm/Analysis/LoopInfo.h"
1920
#include "llvm/Analysis/ScalarEvolution.h"
2021

2122
using namespace llvm;
2223

24+
/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB, if
25+
/// it is a header of a loop.
26+
static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB,
27+
VPDominatorTree &VPDT) {
28+
if (HeaderVPBB->getNumPredecessors() != 2)
29+
return nullptr;
30+
VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
31+
VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
32+
if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
33+
return nullptr;
34+
assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
35+
"preheader must dominate header");
36+
VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
37+
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
38+
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
39+
if (Succ)
40+
VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
41+
42+
auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
43+
false /*isReplicator*/);
44+
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
45+
// because VPlan is expected to end at top level latch.
46+
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
47+
VPBB->setParent(R);
48+
49+
VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
50+
if (Succ)
51+
VPBlockUtils::connectBlocks(R, Succ);
52+
return R;
53+
}
54+
2355
void VPlanTransforms::introduceTopLevelVectorLoopRegion(
2456
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
2557
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
26-
// TODO: Generalize to introduce all loop regions.
27-
auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
28-
VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB);
58+
VPDominatorTree VPDT;
59+
VPDT.recalculate(Plan);
2960

30-
VPBasicBlock *OriginalLatch =
31-
cast<VPBasicBlock>(HeaderVPBB->getSinglePredecessor());
32-
VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB);
33-
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
34-
VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader);
35-
assert(OriginalLatch->getNumSuccessors() == 0 &&
36-
"Plan should end at top level latch");
61+
auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
62+
VPRegionBlock *TopRegion = introduceRegion(Plan, HeaderVPBB, VPDT);
63+
auto *OrigExiting = TopRegion->getExiting();
64+
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
65+
VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
66+
TopRegion->setExiting(LatchVPBB);
67+
TopRegion->setName("vector loop");
3768

3869
// Create SCEV and VPValue for the trip count.
3970
// We use the symbolic max backedge-taken-count, which works also when
@@ -47,18 +78,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
4778
Plan.setTripCount(
4879
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
4980

50-
// Create VPRegionBlock, with existing header and new empty latch block, to be
51-
// filled.
52-
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
53-
VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch);
54-
auto *TopRegion = Plan.createVPRegionBlock(
55-
HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/);
56-
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
57-
// because VPlan is expected to end at top level latch.
58-
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
59-
VPBB->setParent(TopRegion);
81+
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
82+
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
6083

61-
VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
6284
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
6385
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
6486

@@ -98,4 +120,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
98120
ScalarLatchTerm->getDebugLoc(), "cmp.n");
99121
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
100122
ScalarLatchTerm->getDebugLoc());
123+
124+
for (VPBlockBase *HeaderVPBB :
125+
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
126+
introduceRegion(Plan, HeaderVPBB, VPDT);
127+
}
101128
}

llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp

+15-97
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@
1212
/// components and steps:
1313
//
1414
/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
15-
/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
16-
/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
17-
/// in the plain CFG.
15+
/// faithfully represents the CFG in the incoming IR.
1816
/// NOTE: At this point, there is a direct correspondence between all the
1917
/// VPBasicBlocks created for the initial plain CFG and the incoming
2018
/// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
5755
// Hold phi node's that need to be fixed once the plain CFG has been built.
5856
SmallVector<PHINode *, 8> PhisToFix;
5957

60-
/// Maps loops in the original IR to their corresponding region.
61-
DenseMap<Loop *, VPRegionBlock *> Loop2Region;
62-
6358
// Utility functions.
6459
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
65-
void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
6660
void fixHeaderPhis();
6761
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
6862
#ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
8377
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
8478
// must have no predecessors.
8579
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
86-
auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
87-
auto *SinglePred = BB->getSinglePredecessor();
88-
Loop *LoopForBB = LI->getLoopFor(BB);
89-
if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
90-
return nullptr;
91-
// The input IR must be in loop-simplify form, ensuring a single predecessor
92-
// for exit blocks.
93-
assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
94-
"SinglePred must be the only loop latch");
95-
return SinglePred;
96-
};
97-
if (auto *LatchBB = GetLatchOfExit(BB)) {
98-
auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
99-
assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
100-
"successor must already be set for PredRegion; it must have VPBB "
101-
"as single successor");
102-
VPBB->setPredecessors({PredRegion});
103-
return;
104-
}
10580
// Collect VPBB predecessors.
10681
SmallVector<VPBlockBase *, 2> VPBBPreds;
10782
for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
11388
return L && BB == L->getHeader();
11489
}
11590

116-
void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
117-
BasicBlock *BB) {
118-
// BB is a loop header block. Connect the region to the loop preheader.
119-
Loop *LoopOfBB = LI->getLoopFor(BB);
120-
Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
121-
}
122-
12391
// Add operands to VPInstructions representing phi nodes from the input IR.
12492
void PlainCFGBuilder::fixHeaderPhis() {
12593
for (auto *Phi : PhisToFix) {
@@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
150118
return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
151119
}
152120

153-
/// Return true of \p L loop is contained within \p OuterLoop.
154-
static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
155-
if (L->getLoopDepth() < OuterLoop->getLoopDepth())
156-
return false;
157-
const Loop *P = L;
158-
while (P) {
159-
if (P == OuterLoop)
160-
return true;
161-
P = P->getParentLoop();
162-
}
163-
return false;
164-
}
165-
166121
// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
167122
// corresponding to the containing loop or retrieve an existing one if it was
168123
// already created. If no region exists yet for the loop containing \p BB, a new
@@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
178133
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
179134
VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
180135
BB2VPBB[BB] = VPBB;
181-
182-
// Get or create a region for the loop containing BB, except for the top
183-
// region of TheLoop which is created later.
184-
Loop *LoopOfBB = LI->getLoopFor(BB);
185-
if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
186-
return VPBB;
187-
188-
auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
189-
if (!isHeaderBB(BB, LoopOfBB)) {
190-
assert(RegionOfVPBB &&
191-
"Region should have been created by visiting header earlier");
192-
VPBB->setParent(RegionOfVPBB);
193-
return VPBB;
194-
}
195-
196-
assert(!RegionOfVPBB &&
197-
"First visit of a header basic block expects to register its region.");
198-
// Handle a header - take care of its Region.
199-
RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
200-
RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
201-
RegionOfVPBB->setEntry(VPBB);
202-
Loop2Region[LoopOfBB] = RegionOfVPBB;
203136
return VPBB;
204137
}
205138

@@ -351,6 +284,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
351284
// Main interface to build the plain CFG.
352285
void PlainCFGBuilder::buildPlainCFG(
353286
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
287+
VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
288+
BB2VPBB[Entry->getIRBasicBlock()] = Entry;
354289

355290
// 1. Scan the body of the loop in a topological order to visit each basic
356291
// block after having visited its predecessor basic blocks. Create a VPBB for
@@ -376,23 +311,21 @@ void PlainCFGBuilder::buildPlainCFG(
376311
for (BasicBlock *BB : RPO) {
377312
// Create or retrieve the VPBasicBlock for this BB.
378313
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
379-
VPRegionBlock *Region = VPBB->getParent();
380314
Loop *LoopForBB = LI->getLoopFor(BB);
381315
// Set VPBB predecessors in the same order as they are in the incoming BB.
382316
if (!isHeaderBB(BB, LoopForBB)) {
383317
setVPBBPredsFromBB(VPBB, BB);
384-
} else if (Region) {
385-
// BB is a loop header and there's a corresponding region, set the
386-
// predecessor for it.
387-
setRegionPredsFromBB(Region, BB);
318+
} else {
319+
VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
320+
getOrCreateVPBB(LoopForBB->getLoopLatch())});
388321
}
389322

390323
// Create VPInstructions for BB.
391324
createVPInstructionsForVPBB(VPBB, BB);
392325

393326
if (BB == TheLoop->getLoopLatch()) {
394327
VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader());
395-
VPBlockUtils::connectBlocks(VPBB, HeaderVPBB);
328+
VPBB->setOneSuccessor(HeaderVPBB);
396329
continue;
397330
}
398331

@@ -423,21 +356,11 @@ void PlainCFGBuilder::buildPlainCFG(
423356
BasicBlock *IRSucc1 = BI->getSuccessor(1);
424357
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
425358
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
426-
if (BB == LoopForBB->getLoopLatch()) {
427-
// For a latch we need to set the successor of the region rather than that
428-
// of VPBB and it should be set to the exit, i.e., non-header successor,
429-
// except for the top region, which is handled elsewhere.
430-
assert(LoopForBB != TheLoop &&
431-
"Latch of the top region should have been handled earlier");
432-
Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
433-
: Successor0);
434-
Region->setExiting(VPBB);
435-
continue;
436-
}
437359

438-
// Don't connect any blocks outside the current loop except the latch for
439-
// now. The latch is handled above.
440-
if (LoopForBB) {
360+
// Don't connect any blocks outside the current loop except the latch, which
361+
// is handled below.
362+
if (LoopForBB &&
363+
(LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
441364
if (!LoopForBB->contains(IRSucc0)) {
442365
VPBB->setOneSuccessor(Successor1);
443366
continue;
@@ -456,21 +379,16 @@ void PlainCFGBuilder::buildPlainCFG(
456379
// corresponding VPlan operands.
457380
fixHeaderPhis();
458381

459-
VPBlockUtils::connectBlocks(Plan.getEntry(),
460-
getOrCreateVPBB(TheLoop->getHeader()));
382+
Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
383+
Plan.getEntry()->setPlan(&Plan);
461384

462385
for (const auto &[IRBB, VPB] : BB2VPBB)
463386
VPB2IRBB[VPB] = IRBB;
387+
388+
LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
464389
}
465390

466391
void VPlanHCFGBuilder::buildPlainCFG() {
467392
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
468393
PCFGBuilder.buildPlainCFG(VPB2IRBB);
469394
}
470-
471-
// Public interface to build a H-CFG.
472-
void VPlanHCFGBuilder::buildHierarchicalCFG() {
473-
// Build Top Region enclosing the plain CFG.
474-
buildPlainCFG();
475-
LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
476-
}

llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h

+2-6
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ namespace llvm {
3030

3131
class Loop;
3232
class LoopInfo;
33-
class VPRegionBlock;
3433
class VPlan;
3534
class VPlanTestIRBase;
3635
class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
5453
/// created for a input IR basic block.
5554
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
5655

57-
/// Build plain CFG for TheLoop and connects it to Plan's entry.
58-
void buildPlainCFG();
59-
6056
public:
6157
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
6258
: TheLoop(Lp), LI(LI), Plan(P) {}
6359

64-
/// Build H-CFG for TheLoop and update Plan accordingly.
65-
void buildHierarchicalCFG();
60+
/// Build plain CFG for TheLoop and connects it to Plan's entry.
61+
void buildPlainCFG();
6662

6763
/// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
6864
/// there is no such corresponding block.

llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll

+9-12
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
@arr = external global [8 x [8 x i64]], align 16
77

88
define void @foo(i64 %n) {
9-
; CHECK: VPlan 'HCFGBuilder: Plain CFG
9+
; CHECK: VPlan 'Plain CFG
1010
; CHECK-NEXT: {
1111
; CHECK-EMPTY:
1212
; CHECK-NEXT: ir-bb<entry>:
@@ -19,17 +19,14 @@ define void @foo(i64 %n) {
1919
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
2020
; CHECK-NEXT: Successor(s): inner
2121
; CHECK-EMPTY:
22-
; CHECK-NEXT: <x1> inner: {
23-
; CHECK-NEXT: inner:
24-
; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
25-
; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
26-
; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
27-
; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
28-
; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
29-
; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
30-
; CHECK-NEXT: No successors
31-
; CHECK-NEXT: }
32-
; CHECK-NEXT: Successor(s): outer.latch
22+
; CHECK-NEXT: inner:
23+
; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
24+
; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
25+
; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
26+
; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
27+
; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
28+
; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
29+
; CHECK-NEXT: Successor(s): outer.latch, inner
3330
; CHECK-EMPTY:
3431
; CHECK-NEXT: outer.latch:
3532
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>

llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; Verify that the stress testing flag for the VPlan H-CFG builder works as
55
; expected with and without enabling the VPlan H-CFG Verifier.
66

7-
; CHECK: VPlan 'HCFGBuilder: Plain CFG
7+
; CHECK: VPlan 'Plain CFG
88

99
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
1010

0 commit comments

Comments
 (0)