Skip to content

Commit f6de6d8

Browse files
committed
[VPlan] Introduce child regions as VPlan transform.
Further simplify VPlan CFG builder by moving introduction of inner regions to a VPlan transform, building on #128419 The HCFG builder now only constructs plain CFGs. I will move it to VPlanConstruction as follow-up.
1 parent 7c4ef49 commit f6de6d8

File tree

7 files changed

+59
-117
lines changed

7 files changed

+59
-117
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -9314,10 +9314,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93149314
Range);
93159315
auto Plan = std::make_unique<VPlan>(OrigLoop);
93169316
// Build hierarchical CFG.
9317-
// Convert to VPlan-transform and consoliate all transforms for VPlan
9317+
// TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
93189318
// creation.
93199319
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
9320-
HCFGBuilder.buildHierarchicalCFG();
9320+
HCFGBuilder.buildPlainCFG();
93219321

93229322
VPlanTransforms::introduceTopLevelVectorLoopRegion(
93239323
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
@@ -9621,7 +9621,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
96219621
auto Plan = std::make_unique<VPlan>(OrigLoop);
96229622
// Build hierarchical CFG
96239623
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
9624-
HCFGBuilder.buildHierarchicalCFG();
9624+
HCFGBuilder.buildPlainCFG();
96259625

96269626
VPlanTransforms::introduceTopLevelVectorLoopRegion(
96279627
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

+33
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,43 @@
1414
#include "LoopVectorizationPlanner.h"
1515
#include "VPlan.h"
1616
#include "VPlanCFG.h"
17+
#include "VPlanDominatorTree.h"
1718
#include "VPlanTransforms.h"
1819
#include "llvm/Analysis/LoopInfo.h"
1920
#include "llvm/Analysis/ScalarEvolution.h"
2021

2122
using namespace llvm;
2223

24+
/// Introduce VPRegionBlocks for each loop modeled using a plain CFG in \p Plan.
25+
static void introduceInnerLoopRegions(VPlan &Plan) {
26+
VPDominatorTree VPDT;
27+
VPDT.recalculate(Plan);
28+
29+
for (VPBlockBase *HeaderVPBB :
30+
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
31+
if (HeaderVPBB->getNumPredecessors() != 2)
32+
continue;
33+
VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
34+
VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
35+
if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
36+
continue;
37+
assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
38+
"preheader must dominate header");
39+
VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
40+
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
41+
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
42+
VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
43+
44+
auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
45+
false /*isReplicator*/);
46+
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
47+
VPBB->setParent(R);
48+
49+
VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
50+
VPBlockUtils::connectBlocks(R, Succ);
51+
}
52+
}
53+
2354
void VPlanTransforms::introduceTopLevelVectorLoopRegion(
2455
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
2556
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
@@ -98,4 +129,6 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
98129
ScalarLatchTerm->getDebugLoc(), "cmp.n");
99130
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
100131
ScalarLatchTerm->getDebugLoc());
132+
133+
introduceInnerLoopRegions(Plan);
101134
}

llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp

+10-94
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@
1212
/// components and steps:
1313
//
1414
/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
15-
/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
16-
/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
17-
/// in the plain CFG.
15+
/// faithfully represents the CFG in the incoming IR.
1816
/// NOTE: At this point, there is a direct correspondence between all the
1917
/// VPBasicBlocks created for the initial plain CFG and the incoming
2018
/// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
5755
// Hold phi node's that need to be fixed once the plain CFG has been built.
5856
SmallVector<PHINode *, 8> PhisToFix;
5957

60-
/// Maps loops in the original IR to their corresponding region.
61-
DenseMap<Loop *, VPRegionBlock *> Loop2Region;
62-
6358
// Utility functions.
6459
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
65-
void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
6660
void fixHeaderPhis();
6761
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
6862
#ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
8377
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
8478
// must have no predecessors.
8579
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
86-
auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
87-
auto *SinglePred = BB->getSinglePredecessor();
88-
Loop *LoopForBB = LI->getLoopFor(BB);
89-
if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
90-
return nullptr;
91-
// The input IR must be in loop-simplify form, ensuring a single predecessor
92-
// for exit blocks.
93-
assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
94-
"SinglePred must be the only loop latch");
95-
return SinglePred;
96-
};
97-
if (auto *LatchBB = GetLatchOfExit(BB)) {
98-
auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
99-
assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
100-
"successor must already be set for PredRegion; it must have VPBB "
101-
"as single successor");
102-
VPBB->setPredecessors({PredRegion});
103-
return;
104-
}
10580
// Collect VPBB predecessors.
10681
SmallVector<VPBlockBase *, 2> VPBBPreds;
10782
for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
11388
return L && BB == L->getHeader();
11489
}
11590

116-
void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
117-
BasicBlock *BB) {
118-
// BB is a loop header block. Connect the region to the loop preheader.
119-
Loop *LoopOfBB = LI->getLoopFor(BB);
120-
Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
121-
}
122-
12391
// Add operands to VPInstructions representing phi nodes from the input IR.
12492
void PlainCFGBuilder::fixHeaderPhis() {
12593
for (auto *Phi : PhisToFix) {
@@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
150118
return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
151119
}
152120

153-
/// Return true of \p L loop is contained within \p OuterLoop.
154-
static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
155-
if (L->getLoopDepth() < OuterLoop->getLoopDepth())
156-
return false;
157-
const Loop *P = L;
158-
while (P) {
159-
if (P == OuterLoop)
160-
return true;
161-
P = P->getParentLoop();
162-
}
163-
return false;
164-
}
165-
166121
// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
167122
// corresponding to the containing loop or retrieve an existing one if it was
168123
// already created. If no region exists yet for the loop containing \p BB, a new
@@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
178133
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
179134
VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
180135
BB2VPBB[BB] = VPBB;
181-
182-
// Get or create a region for the loop containing BB, except for the top
183-
// region of TheLoop which is created later.
184-
Loop *LoopOfBB = LI->getLoopFor(BB);
185-
if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
186-
return VPBB;
187-
188-
auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
189-
if (!isHeaderBB(BB, LoopOfBB)) {
190-
assert(RegionOfVPBB &&
191-
"Region should have been created by visiting header earlier");
192-
VPBB->setParent(RegionOfVPBB);
193-
return VPBB;
194-
}
195-
196-
assert(!RegionOfVPBB &&
197-
"First visit of a header basic block expects to register its region.");
198-
// Handle a header - take care of its Region.
199-
RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
200-
RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
201-
RegionOfVPBB->setEntry(VPBB);
202-
Loop2Region[LoopOfBB] = RegionOfVPBB;
203136
return VPBB;
204137
}
205138

@@ -376,15 +309,13 @@ void PlainCFGBuilder::buildPlainCFG(
376309
for (BasicBlock *BB : RPO) {
377310
// Create or retrieve the VPBasicBlock for this BB.
378311
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
379-
VPRegionBlock *Region = VPBB->getParent();
380312
Loop *LoopForBB = LI->getLoopFor(BB);
381313
// Set VPBB predecessors in the same order as they are in the incoming BB.
382314
if (!isHeaderBB(BB, LoopForBB)) {
383315
setVPBBPredsFromBB(VPBB, BB);
384-
} else if (Region) {
385-
// BB is a loop header and there's a corresponding region, set the
386-
// predecessor for it.
387-
setRegionPredsFromBB(Region, BB);
316+
} else if (LoopForBB != TheLoop) {
317+
VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
318+
getOrCreateVPBB(LoopForBB->getLoopLatch())});
388319
}
389320

390321
// Create VPInstructions for BB.
@@ -423,21 +354,11 @@ void PlainCFGBuilder::buildPlainCFG(
423354
BasicBlock *IRSucc1 = BI->getSuccessor(1);
424355
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
425356
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
426-
if (BB == LoopForBB->getLoopLatch()) {
427-
// For a latch we need to set the successor of the region rather than that
428-
// of VPBB and it should be set to the exit, i.e., non-header successor,
429-
// except for the top region, which is handled elsewhere.
430-
assert(LoopForBB != TheLoop &&
431-
"Latch of the top region should have been handled earlier");
432-
Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
433-
: Successor0);
434-
Region->setExiting(VPBB);
435-
continue;
436-
}
437357

438-
// Don't connect any blocks outside the current loop except the latch for
439-
// now. The latch is handled above.
440-
if (LoopForBB) {
358+
// Don't connect any blocks outside the current loop except the latch, which
359+
// is handled below.
360+
if (LoopForBB &&
361+
(LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
441362
if (!LoopForBB->contains(IRSucc0)) {
442363
VPBB->setOneSuccessor(Successor1);
443364
continue;
@@ -461,16 +382,11 @@ void PlainCFGBuilder::buildPlainCFG(
461382

462383
for (const auto &[IRBB, VPB] : BB2VPBB)
463384
VPB2IRBB[VPB] = IRBB;
385+
386+
LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
464387
}
465388

466389
void VPlanHCFGBuilder::buildPlainCFG() {
467390
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
468391
PCFGBuilder.buildPlainCFG(VPB2IRBB);
469392
}
470-
471-
// Public interface to build a H-CFG.
472-
void VPlanHCFGBuilder::buildHierarchicalCFG() {
473-
// Build Top Region enclosing the plain CFG.
474-
buildPlainCFG();
475-
LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
476-
}

llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h

+2-6
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ namespace llvm {
3030

3131
class Loop;
3232
class LoopInfo;
33-
class VPRegionBlock;
3433
class VPlan;
3534
class VPlanTestIRBase;
3635
class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
5453
/// created for a input IR basic block.
5554
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
5655

57-
/// Build plain CFG for TheLoop and connects it to Plan's entry.
58-
void buildPlainCFG();
59-
6056
public:
6157
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
6258
: TheLoop(Lp), LI(LI), Plan(P) {}
6359

64-
/// Build H-CFG for TheLoop and update Plan accordingly.
65-
void buildHierarchicalCFG();
60+
/// Build plain CFG for TheLoop and connects it to Plan's entry.
61+
void buildPlainCFG();
6662

6763
/// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
6864
/// there is no such corresponding block.

llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll

+9-12
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
@arr = external global [8 x [8 x i64]], align 16
77

88
define void @foo(i64 %n) {
9-
; CHECK: VPlan 'HCFGBuilder: Plain CFG
9+
; CHECK: VPlan 'Plain CFG
1010
; CHECK-NEXT: {
1111
; CHECK-EMPTY:
1212
; CHECK-NEXT: ir-bb<entry>:
@@ -19,17 +19,14 @@ define void @foo(i64 %n) {
1919
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
2020
; CHECK-NEXT: Successor(s): inner
2121
; CHECK-EMPTY:
22-
; CHECK-NEXT: <x1> inner: {
23-
; CHECK-NEXT: inner:
24-
; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
25-
; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
26-
; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
27-
; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
28-
; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
29-
; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
30-
; CHECK-NEXT: No successors
31-
; CHECK-NEXT: }
32-
; CHECK-NEXT: Successor(s): outer.latch
22+
; CHECK-NEXT: inner:
23+
; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
24+
; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
25+
; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
26+
; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
27+
; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
28+
; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
29+
; CHECK-NEXT: Successor(s): outer.latch, inner
3330
; CHECK-EMPTY:
3431
; CHECK-NEXT: outer.latch:
3532
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>

llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; Verify that the stress testing flag for the VPlan H-CFG builder works as
55
; expected with and without enabling the VPlan H-CFG Verifier.
66

7-
; CHECK: VPlan 'HCFGBuilder: Plain CFG
7+
; CHECK: VPlan 'Plain CFG
88

99
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
1010

llvm/unittests/Transforms/Vectorize/VPlanTestBase.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class VPlanTestIRBase : public testing::Test {
7373
PredicatedScalarEvolution PSE(*SE, *L);
7474
auto Plan = std::make_unique<VPlan>(L);
7575
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
76-
HCFGBuilder.buildHierarchicalCFG();
76+
HCFGBuilder.buildPlainCFG();
7777
VPlanTransforms::introduceTopLevelVectorLoopRegion(
7878
*Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
7979
return Plan;

0 commit comments

Comments
 (0)