Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9308,10 +9308,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Range);
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG.
// Convert to VPlan-transform and consoliate all transforms for VPlan
// TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
// TODO: Convert to VPlan-transform and consolidate all transforms for VPlan

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks!

// creation.
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();
HCFGBuilder.buildPlainCFG();

VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
Expand Down Expand Up @@ -9615,7 +9615,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();
HCFGBuilder.buildPlainCFG();

VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
Expand Down
69 changes: 48 additions & 21 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,57 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
#include "VPlanTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"

using namespace llvm;

/// Create and return a new VPRegionBlock for loop starting at \p HeaderVPBB, if
/// it is a header of a loop.
static VPRegionBlock *introduceRegion(VPlan &Plan, VPBlockBase *HeaderVPBB,
VPDominatorTree &VPDT) {
if (HeaderVPBB->getNumPredecessors() != 2)
return nullptr;
VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
return nullptr;
assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
"preheader must dominate header");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check if isHeaderVPBB() separately, based on dominance for flat cfg rather than enclosing region for HCFG, and have introduceRegion() return non-null region if so?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the code to check for header blocks to VPBasicBlock::isHeader and check it before calling introduceRegion, thanks

VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
assert(LatchVPBB->getNumSuccessors() <= 1 && "Latch has more than one successor");
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

if (Succ)
VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);

auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
false /*isReplicator*/);
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
// because VPlan is expected to end at top level latch.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// because VPlan is expected to end at top level latch.
// because VPlan is expected to end at top level latch disconnected above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks!

for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
VPBB->setParent(R);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about setting the parent of the newly introduced region itself?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!


VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
if (Succ)
VPBlockUtils::connectBlocks(R, Succ);
return R;
}

void VPlanTransforms::introduceTopLevelVectorLoopRegion(
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
// TODO: Generalize to introduce all loop regions.
auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB);
VPDominatorTree VPDT;
VPDT.recalculate(Plan);

VPBasicBlock *OriginalLatch =
cast<VPBasicBlock>(HeaderVPBB->getSinglePredecessor());
VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB);
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader);
assert(OriginalLatch->getNumSuccessors() == 0 &&
"Plan should end at top level latch");
auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
VPRegionBlock *TopRegion = introduceRegion(Plan, HeaderVPBB, VPDT);
auto *OrigExiting = TopRegion->getExiting();
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
TopRegion->setExiting(LatchVPBB);
Comment on lines +93 to +96
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this additional latch block really needed, or can it be removed (independently)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is unsed only for convenience, when adjusting reductions it is used to place selects there if needed. I'll look into remove it separartely.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps worth a comment, along with explaining that this section assigns distinct names to the (just created) Top Region and its latch block, which is introduced as a convenience.
Can also reset the name of Top Region's header to "vector.body" here, instead of setting it during plain CFG construction?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

TopRegion->setName("vector loop");

// Create SCEV and VPValue for the trip count.
// We use the symbolic max backedge-taken-count, which works also when
Expand All @@ -47,18 +78,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
Plan.setTripCount(
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));

// Create VPRegionBlock, with existing header and new empty latch block, to be
// filled.
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch);
auto *TopRegion = Plan.createVPRegionBlock(
HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/);
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
// because VPlan is expected to end at top level latch.
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
VPBB->setParent(TopRegion);
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());

VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);

Expand Down Expand Up @@ -98,4 +120,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());

for (VPBlockBase *HeaderVPBB :
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Introduce nested regions also before bailing out above when a scalar epilog check is not required?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved up the loop to create all regions via the loop (also the top-most).

vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cfg is flat so both shallow and deep traverse all blocks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep.

introduceRegion(Plan, HeaderVPBB, VPDT);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are nested regions introduced from outermost in? Is the parent of an inner region set to its enclosing region?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are created from outermost to innermost. Updated to also set the parents of the created regions, thanks

}
}
112 changes: 15 additions & 97 deletions llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
/// components and steps:
//
/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
/// in the plain CFG.
/// faithfully represents the CFG in the incoming IR.
/// NOTE: At this point, there is a direct correspondence between all the
/// VPBasicBlocks created for the initial plain CFG and the incoming
/// BasicBlocks. However, this might change in the future.
Expand Down Expand Up @@ -57,12 +55,8 @@ class PlainCFGBuilder {
// Hold phi node's that need to be fixed once the plain CFG has been built.
SmallVector<PHINode *, 8> PhisToFix;

/// Maps loops in the original IR to their corresponding region.
DenseMap<Loop *, VPRegionBlock *> Loop2Region;

// Utility functions.
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
void fixHeaderPhis();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
#ifndef NDEBUG
Expand All @@ -83,25 +77,6 @@ class PlainCFGBuilder {
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
// must have no predecessors.
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
auto *SinglePred = BB->getSinglePredecessor();
Loop *LoopForBB = LI->getLoopFor(BB);
if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
return nullptr;
// The input IR must be in loop-simplify form, ensuring a single predecessor
// for exit blocks.
assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
"SinglePred must be the only loop latch");
return SinglePred;
};
if (auto *LatchBB = GetLatchOfExit(BB)) {
auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
"successor must already be set for PredRegion; it must have VPBB "
"as single successor");
VPBB->setPredecessors({PredRegion});
return;
}
// Collect VPBB predecessors.
SmallVector<VPBlockBase *, 2> VPBBPreds;
for (BasicBlock *Pred : predecessors(BB))
Expand All @@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
return L && BB == L->getHeader();
}

void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
BasicBlock *BB) {
// BB is a loop header block. Connect the region to the loop preheader.
Loop *LoopOfBB = LI->getLoopFor(BB);
Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
}

// Add operands to VPInstructions representing phi nodes from the input IR.
void PlainCFGBuilder::fixHeaderPhis() {
for (auto *Phi : PhisToFix) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Header phi's expect preheader as first predecessor and latch as second, corresponding to operands added below, so make sure here the predecessors are ordered accordingly, i.e., swap if needed? Later when loop regions are introduced, can rely on this order.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the patch to use the original order here and swap if needed when region is introduced.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But here it's easy to set (both) things right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previosuly it wasnt consistent, but with the current patch it is; both use the order from the CFG in the current version and are a 1-1 translation of the CFG.

In the current patch, the canonicalization of header phi operand and header predecessor order is done completely separate from CFG construction, when the region is introduced.

In general, I think we should move away from directly relying on the operands to retrieve incoming values and move towards an interface getIncomingValueFor(VPB), which takes care of checking what the index for VPB is.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Responding to some comments inline regarding phi/predecessor ordering to start with, to clarify. Would be good to decide first where we want the canonicalization of phi/predecessors to happen.

Doing it separately from initial VPlan construction has the benefit of reducing the number of special cases needed during initial construction and shifts the complexity of detecting the preheader/latch to a VPlan transform, currently integrated with introducing regions, but could also be a separate canonicalization.

Ah, ok, I see. Sketching how separate canonicalization may look like, for clarify and to help reason about what if anything should be fused:

Let VPlanCFG0 denote the initial VPlan, constructed from IR. VPlanCFG0 may indeed be a consistent 1-1 translation of the CFG and IR, arguably with all its branches - including early-exits and latch-to-exit (and perhaps unconditional branches too), possibly also representing the BasicBlock operands of phi's and branches as VPValues wrapping these Values, as operands of the corresponding VPIRInstructions.

Let VPlanCFG1 denote the result of canonicalizing VPlanCFG0's phi's and branches according to predecessors/successors order (instead of their BasicBlock operands). It may be better to change the types of recipes used, even when the original predecessor/successor order is retained, to clarify that they belong to distinct VPlan "modes" or "stages". Early-exit branch recipes and successors can be eliminated now.

Let VPlanHCFG0 denote the (first) HCFG VPlan produced by a VPlan transform devoted to canonical-CFG --> HCFG conversion that introduces child (or actually all loop) regions into VPlanCFG1. Would this transform need to update/change any recipes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let VPlanCFG0 denote the initial VPlan, constructed from IR. VPlanCFG0 may indeed be a consistent 1-1 translation of the CFG and IR, arguably with all its branches - including early-exits and latch-to-exit (and perhaps unconditional branches too), possibly also representing the BasicBlock operands of phi's and branches as VPValues wrapping these Values, as operands of the corresponding VPIRInstructions.

Yep, I have some follow-up patches to retain the exit branches and keep the exit blocks connected.

In a sense, the specific operand order (1st op = preheader, 2nd op = latch) for header phis is purely a consequence of introducing a region, which itself only has a single predecessor I think. So introducing the canonical order at this point seems like a good fit; the header phis are the only ones which need their order adjusted.

Using a separate recipe for the initial recipes sounds good, something like VPInstruction which always has an underlying instruction would be good, but already models the operands/defs in VPlan via operands. Unfortunately VPIRInstruction is already taken. We would need a version that defines result VPValues and uses VPvalue as operands.

Expand Down Expand Up @@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
}

/// Return true of \p L loop is contained within \p OuterLoop.
static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
if (L->getLoopDepth() < OuterLoop->getLoopDepth())
return false;
const Loop *P = L;
while (P) {
if (P == OuterLoop)
return true;
P = P->getParentLoop();
}
return false;
}

// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
// corresponding to the containing loop or retrieve an existing one if it was
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The region/loop part deserves updating/removal.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks.

// already created. If no region exists yet for the loop containing \p BB, a new
Expand All @@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
BB2VPBB[BB] = VPBB;

// Get or create a region for the loop containing BB, except for the top
// region of TheLoop which is created later.
Loop *LoopOfBB = LI->getLoopFor(BB);
if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
return VPBB;

auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
if (!isHeaderBB(BB, LoopOfBB)) {
assert(RegionOfVPBB &&
"Region should have been created by visiting header earlier");
VPBB->setParent(RegionOfVPBB);
return VPBB;
}

assert(!RegionOfVPBB &&
"First visit of a header basic block expects to register its region.");
// Handle a header - take care of its Region.
RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
RegionOfVPBB->setEntry(VPBB);
Loop2Region[LoopOfBB] = RegionOfVPBB;
return VPBB;
}

Expand Down Expand Up @@ -351,6 +284,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
// Main interface to build the plain CFG.
void PlainCFGBuilder::buildPlainCFG(
DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan.getEntry());
BB2VPBB[Entry->getIRBasicBlock()] = Entry;

// 1. Scan the body of the loop in a topological order to visit each basic
// block after having visited its predecessor basic blocks. Create a VPBB for
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo below: incomming

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will adjust separately.

Expand All @@ -376,23 +311,21 @@ void PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
VPRegionBlock *Region = VPBB->getParent();
Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
if (!isHeaderBB(BB, LoopForBB)) {
setVPBBPredsFromBB(VPBB, BB);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can setVPBBPredsFromBB() also handle header blocks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, the code relies on the header having a specific predecessor order (preheader, then latch). We might be able to remove this restriction, because the dominance based approach could be generalized to handle it. It would require extra logic to determine which block is the preheader/latch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just pushed an update to not rely on the predecessor order (see cc81801 for this change in isolation)

} else if (Region) {
// BB is a loop header and there's a corresponding region, set the
// predecessor for it.
setRegionPredsFromBB(Region, BB);
} else {
VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
getOrCreateVPBB(LoopForBB->getLoopLatch())});
}

// Create VPInstructions for BB.
createVPInstructionsForVPBB(VPBB, BB);

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can VPBB's successors be set according to BB's successors, in all cases, similar to predecessors. Or according to case order for switches, and true/false for conditional branches, but independent of loop latch or not?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above, we could , with extra logic when introducing regions to figure out what successor is the exit block and which one the header.

Should be doable, but might be better separately?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assumptions about successors and successor order should be consistent with terminating recipes that target them, including the removal of early-exit branches/successors. Perhaps similar to [header]phisToFix/fixHeaderPhis() that should arguably post-process predecessors to align with phi recipes as they are fixed, after first mirroring the IR predecessors as-is, a post-processing of VPIRInstructions that first wrap IR terminals should take care of both converting them into BranchOnCond/Switch VP[nonIR]Instructions along with possible swapping of successors, plus removal of early-exit branches? Can be done separately if preferred.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the patch to make the phi operand order consistent with predecessors.

At the moment,connections to exit blocks are dropped, but they should be retained as follow up (initial patch here 317d975 )

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, this may clarify some confusion raised elsewhere.

if (BB == TheLoop->getLoopLatch()) {
VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader());
VPBlockUtils::connectBlocks(VPBB, HeaderVPBB);
VPBB->setOneSuccessor(HeaderVPBB);
continue;
}

Expand Down Expand Up @@ -423,21 +356,11 @@ void PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
if (BB == LoopForBB->getLoopLatch()) {
// For a latch we need to set the successor of the region rather than that
// of VPBB and it should be set to the exit, i.e., non-header successor,
// except for the top region, which is handled elsewhere.
assert(LoopForBB != TheLoop &&
"Latch of the top region should have been handled earlier");
Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
: Successor0);
Region->setExiting(VPBB);
continue;
}

// Don't connect any blocks outside the current loop except the latch for
// now. The latch is handled above.
if (LoopForBB) {
// Don't connect any blocks outside the current loop except the latch, which
// is handled below.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For TheLoop this now avoids connecting the latch to its exit block, left for skeleton/middle-block construction?
Intention is to remove early exit branches from all loops?
Latch was previously "handled above", now "handled below" - where, in following setTwoSuccessors()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ths should say for the latches of inner loops. Updated, thanks

if (LoopForBB &&
(LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is somewhat confusing: can LoopForBB be null, given that BB traverses all (and only) blocks of TheLoop?

BranchOnCond recipes are introduced for conditional branches (that reside inside the top loop) excluding that of the top loop latch, and if one destination resides outside top loop (suffice to check the latter only?). I.e., early-exits from the top loop are effectively removed, keeping a single successor - the one that stays in the loop. I.e.2, any branch that resides in an inner-loop and exits it (early or via latch) to a destination inside top loop, is represented by a branch recipe. The code here however avoids connecting a block that early exits from an inner loop to its exit block destination, if it leaves the inner loop - but may reside in the top loop. Unclear if such early exits from inner loops are supported (by native).

The latch of an inner loop ends with a branch that is covered by a BranchOnCond recipe, and is connected to both its header and exit, so that dominance info would identify the loop (although a loop header VPBB's could be marked as such), and the CFG remains connected.

The latch of the top loop originally has its header and exit block as successors. The latter was wrapped in a VPIRBB when VPlan was constructed, but will be connected to the latch/region later via middle block, so connecting them here is avoided. Should we avoid recreating it needlessly as Successor0 or Successor1? Alternatively, should the exit block be recorded in BB2VPBB at the beginning of buildPlainCFG(), as done with entry block? And connect the top loop's latch to the exit block, mirroring IR CFG, and adjust this connection later when middle block is introduced? Similar to how a region of an inner loop is connected to its preheader and exit instead of the header and latch, when converting to HCFG.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is somewhat confusing: can LoopForBB be null, given that BB traverses all (and only) blocks of TheLoop?

It cannot be null, I removed the check.

Initial VPlan-construction should also include connecting the exit blocks of the top-level loop eventually. I have a follow-up patch to do so, as it is probably best tackled separately.

if (!LoopForBB->contains(IRSucc0)) {
VPBB->setOneSuccessor(Successor1);
continue;
Expand All @@ -456,21 +379,16 @@ void PlainCFGBuilder::buildPlainCFG(
// corresponding VPlan operands.
fixHeaderPhis();

VPBlockUtils::connectBlocks(Plan.getEntry(),
getOrCreateVPBB(TheLoop->getHeader()));
Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
Plan.getEntry()->setPlan(&Plan);

for (const auto &[IRBB, VPB] : BB2VPBB)
VPB2IRBB[VPB] = IRBB;

LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
}

void VPlanHCFGBuilder::buildPlainCFG() {
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
PCFGBuilder.buildPlainCFG(VPB2IRBB);
Comment on lines 365 to 367
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems even more redundant now to have both VPlanHCFGBuilder and PCFGBuilder, both with their buildPlainCFG().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I left it as-is to avoid unneccessary changes. Will move the code to VPlanConstruction.cpp as follow-up, removing VPlanHCFGBuilder.{h,cpp}.

}

// Public interface to build a H-CFG.
void VPlanHCFGBuilder::buildHierarchicalCFG() {
// Build Top Region enclosing the plain CFG.
buildPlainCFG();
LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
}
8 changes: 2 additions & 6 deletions llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ namespace llvm {

class Loop;
class LoopInfo;
class VPRegionBlock;
class VPlan;
class VPlanTestIRBase;
class VPBlockBase;
Expand All @@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
/// created for a input IR basic block.
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;

/// Build plain CFG for TheLoop and connects it to Plan's entry.
void buildPlainCFG();

public:
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
VPlanCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)

but better be consistent with VPlanHCFGBuilder.h file name, i.e., do so when moving to VPlanConstruction?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I left it as-is to avoid unneccessary changes. Will move the code to VPlanConstruction.cpp as follow-up, removing VPlanHCFGBuilder.{h,cpp}.

: TheLoop(Lp), LI(LI), Plan(P) {}

/// Build H-CFG for TheLoop and update Plan accordingly.
void buildHierarchicalCFG();
/// Build plain CFG for TheLoop and connects it to Plan's entry.
void buildPlainCFG();

/// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
/// there is no such corresponding block.
Expand Down
21 changes: 9 additions & 12 deletions llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@arr = external global [8 x [8 x i64]], align 16

define void @foo(i64 %n) {
; CHECK: VPlan 'HCFGBuilder: Plain CFG
; CHECK: VPlan 'Plain CFG
; CHECK-NEXT: {
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
Expand All @@ -19,17 +19,14 @@ define void @foo(i64 %n) {
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
; CHECK-NEXT: <x1> inner: {
; CHECK-NEXT: inner:
; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): outer.latch
; CHECK-NEXT: inner:
; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
; CHECK-NEXT: Successor(s): outer.latch, inner
; CHECK-EMPTY:
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; Verify that the stress testing flag for the VPlan H-CFG builder works as
; expected with and without enabling the VPlan H-CFG Verifier.

; CHECK: VPlan 'HCFGBuilder: Plain CFG
; CHECK: VPlan 'Plain CFG

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

Expand Down
Loading