Skip to content

Commit 3d96ba3

Browse files
committed
[LoopVectorize] Enable vectorisation of early exit loops with live-outs
This work feeds part of PR #88385, and adds support for vectorising loops with uncountable early exits and outside users of loop-defined variables. When calculating the final value from an uncountable early exit we need to calculate the vector lane that triggered the exit, and hence determine the value at the point we exited. All code for calculating the last value when exiting the loop early now lives in a new vector.early.exit block, which sits between the middle.split block and the original exit block. Doing this required the following fix: * The vplan verifier incorrectly assumed that the block containing a definition always dominates the block of the user. That's not true if you can arrive at the use block from multiple incoming blocks. This is possible for early exit loops where both the early exit and the latch jump to the same block. I've added a new ExtractFirstActive VPInstruction that extracts the first active lane of a vector, i.e. the lane of the vector predicate that triggered the exit. NOTE: The IR generated for dealing with live-outs from early exit loops is unoptimised, as opposed to normal loops. This inevitably leads to poor quality code, but this can be fixed up later.
1 parent 2625510 commit 3d96ba3

16 files changed

+1019
-153
lines changed

llvm/docs/Vectorizers.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,9 +405,11 @@ Early Exit Vectorization
405405
When vectorizing a loop with a single early exit, the loop blocks following the
406406
early exit are predicated and the vector loop will always exit via the latch.
407407
If the early exit has been taken, the vector loop's successor block
408-
(``middle.split`` below) branches to the early exit block. Otherwise
409-
``middle.block`` selects between the exit block from the latch or the scalar
410-
remainder loop.
408+
(``middle.split`` below) branches to the early exit block via an intermediate
409+
block (``vector.early.exit`` below). This intermediate block is responsible for
410+
calculating any exit values of loop-defined variables that are used in the
411+
early exit block. Otherwise, ``middle.block`` selects between the exit block
412+
from the latch or the scalar remainder loop.
411413

412414
.. image:: vplan-early-exit.png
413415

llvm/docs/vplan-early-exit.dot

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,27 @@ compound=true
1919
"middle.split"
2020
]
2121
N4 -> N5 [ label=""]
22-
N4 -> N6 [ label=""]
22+
N4 -> N7 [ label=""]
2323
N5 [label =
24-
"early.exit"
24+
"vector.early.exit"
2525
]
26+
N5 -> N6 [ label=""]
2627
N6 [label =
27-
"middle.block"
28+
"early.exit"
2829
]
29-
N6 -> N9 [ label=""]
30-
N6 -> N7 [ label=""]
3130
N7 [label =
32-
"scalar.ph"
31+
"middle.block"
3332
]
33+
N7 -> N10 [ label=""]
3434
N7 -> N8 [ label=""]
3535
N8 [label =
36-
"loop.header"
36+
"scalar.ph"
3737
]
38+
N8 -> N9 [ label=""]
3839
N9 [label =
40+
"loop.header"
41+
]
42+
N10 [label =
3943
"latch.exit"
4044
]
4145
}

llvm/docs/vplan-early-exit.png

-54.4 KB
Loading

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9378,14 +9378,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93789378

93799379
if (auto *UncountableExitingBlock =
93809380
Legal->getUncountableEarlyExitingBlock()) {
9381-
if (!VPlanTransforms::handleUncountableEarlyExit(
9382-
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock,
9383-
RecipeBuilder)) {
9384-
reportVectorizationFailure(
9385-
"Some exit values in loop with uncountable exit not supported yet",
9386-
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
9387-
return nullptr;
9388-
}
9381+
VPlanTransforms::handleUncountableEarlyExit(
9382+
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
93899383
}
93909384
DenseMap<VPValue *, VPValue *> IVEndValues;
93919385
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -500,8 +500,15 @@ void VPBasicBlock::execute(VPTransformState *State) {
500500
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
501501
// Register NewBB in its loop. In innermost loops its the same for all
502502
// BB's.
503-
if (State->CurrentParentLoop)
504-
State->CurrentParentLoop->addBasicBlockToLoop(NewBB, *State->LI);
503+
Loop *ParentLoop = State->CurrentParentLoop;
504+
// If this block has a sole successor that is an exit block then it needs
505+
// adding to the same parent loop as the exit block.
506+
VPBlockBase *SuccVPBB = getSingleSuccessor();
507+
if (SuccVPBB && State->Plan->isExitBlock(SuccVPBB))
508+
ParentLoop = State->LI->getLoopFor(
509+
cast<VPIRBasicBlock>(SuccVPBB)->getIRBasicBlock());
510+
if (ParentLoop)
511+
ParentLoop->addBasicBlockToLoop(NewBB, *State->LI);
505512
State->Builder.SetInsertPoint(Terminator);
506513

507514
State->CFG.PrevBB = NewBB;
@@ -949,6 +956,16 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
949956
}
950957
}
951958

959+
bool VPlan::isExitBlock(VPBlockBase *VPBB) {
960+
if (isa<VPIRBasicBlock>(VPBB) && VPBB->getNumSuccessors() == 0) {
961+
assert(is_contained(getExitBlocks(), VPBB) &&
962+
"Expected to find VPlan block in list of exit blocks!");
963+
return true;
964+
}
965+
966+
return false;
967+
}
968+
952969
/// Generate the code inside the preheader and body of the vectorized loop.
953970
/// Assumes a single pre-header basic-block was created for this. Introduce
954971
/// additional basic-blocks as needed, and fill them all.

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,9 @@ class VPInstruction : public VPRecipeWithIRFlags,
12231223
// Returns a scalar boolean value, which is true if any lane of its (only
12241224
// boolean) vector operand is true.
12251225
AnyOf,
1226+
// Extracts the first active lane of a vector, where the first operand is
1227+
// the predicate, and the second operand is the vector to extract.
1228+
ExtractFirstActive,
12261229
};
12271230

12281231
private:
@@ -3964,6 +3967,9 @@ class VPlan {
39643967
/// of VPBlockShallowTraversalWrapper.
39653968
auto getExitBlocks();
39663969

3970+
/// Returns true if \p VPBB is an exit block.
3971+
bool isExitBlock(VPBlockBase *VPBB);
3972+
39673973
/// The trip count of the original loop.
39683974
VPValue *getTripCount() const {
39693975
assert(TripCount && "trip count needs to be set before accessing it");

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
7878
case VPInstruction::CanonicalIVIncrementForPart:
7979
case VPInstruction::AnyOf:
8080
return SetResultTyFromOp();
81+
case VPInstruction::ExtractFirstActive:
8182
case VPInstruction::ExtractFromEnd: {
8283
Type *BaseTy = inferScalarType(R->getOperand(0));
8384
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -697,14 +697,21 @@ Value *VPInstruction::generate(VPTransformState &State) {
697697
Value *A = State.get(getOperand(0));
698698
return Builder.CreateOrReduce(A);
699699
}
700-
700+
case VPInstruction::ExtractFirstActive: {
701+
Value *Vec = State.get(getOperand(0));
702+
Value *Mask = State.get(getOperand(1));
703+
Value *Ctz =
704+
Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask);
705+
return Builder.CreateExtractElement(Vec, Ctz);
706+
}
701707
default:
702708
llvm_unreachable("Unsupported opcode for instruction");
703709
}
704710
}
705711

706712
bool VPInstruction::isVectorToScalar() const {
707713
return getOpcode() == VPInstruction::ExtractFromEnd ||
714+
getOpcode() == VPInstruction::ExtractFirstActive ||
708715
getOpcode() == VPInstruction::ComputeReductionResult ||
709716
getOpcode() == VPInstruction::AnyOf;
710717
}
@@ -769,6 +776,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
769776
case VPInstruction::CalculateTripCountMinusVF:
770777
case VPInstruction::CanonicalIVIncrementForPart:
771778
case VPInstruction::ExtractFromEnd:
779+
case VPInstruction::ExtractFirstActive:
772780
case VPInstruction::FirstOrderRecurrenceSplice:
773781
case VPInstruction::LogicalAnd:
774782
case VPInstruction::Not:
@@ -888,6 +896,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
888896
case VPInstruction::AnyOf:
889897
O << "any-of";
890898
break;
899+
case VPInstruction::ExtractFirstActive:
900+
O << "extract-first-active";
901+
break;
891902
default:
892903
O << Instruction::getOpcodeName(getOpcode());
893904
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2062,7 +2062,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
20622062
}
20632063
}
20642064

2065-
bool VPlanTransforms::handleUncountableEarlyExit(
2065+
void VPlanTransforms::handleUncountableEarlyExit(
20662066
VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
20672067
BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
20682068
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
@@ -2099,12 +2099,17 @@ bool VPlanTransforms::handleUncountableEarlyExit(
20992099
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
21002100

21012101
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
2102+
VPBasicBlock *VectorEarlyExitVPBB =
2103+
Plan.createVPBasicBlock("vector.early.exit");
21022104
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
2103-
VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock);
2105+
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
21042106
NewMiddle->swapSuccessors();
21052107

2108+
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
2109+
21062110
// Update the exit phis in the early exit block.
21072111
VPBuilder MiddleBuilder(NewMiddle);
2112+
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
21082113
for (VPRecipeBase &R : *VPEarlyExitBlock) {
21092114
auto *ExitIRI = cast<VPIRInstruction>(&R);
21102115
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
@@ -2113,9 +2118,6 @@ bool VPlanTransforms::handleUncountableEarlyExit(
21132118

21142119
VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn(
21152120
ExitPhi->getIncomingValueForBlock(UncountableExitingBlock));
2116-
// The incoming value from the early exit must be a live-in for now.
2117-
if (!IncomingFromEarlyExit->isLiveIn())
2118-
return false;
21192121

21202122
if (OrigLoop->getUniqueExitBlock()) {
21212123
// If there's a unique exit block, VPEarlyExitBlock has 2 predecessors
@@ -2126,7 +2128,12 @@ bool VPlanTransforms::handleUncountableEarlyExit(
21262128
ExitIRI->addOperand(IncomingFromLatch);
21272129
ExitIRI->extractLastLaneOfOperand(MiddleBuilder);
21282130
}
2131+
21292132
// Add the incoming value from the early exit.
2133+
if (!IncomingFromEarlyExit->isLiveIn())
2134+
IncomingFromEarlyExit =
2135+
EarlyExitB.createNaryOp(VPInstruction::ExtractFirstActive,
2136+
{IncomingFromEarlyExit, EarlyExitTakenCond});
21302137
ExitIRI->addOperand(IncomingFromEarlyExit);
21312138
}
21322139
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
@@ -2144,5 +2151,4 @@ bool VPlanTransforms::handleUncountableEarlyExit(
21442151
Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
21452152
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
21462153
LatchExitingBranch->eraseFromParent();
2147-
return true;
21482154
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ struct VPlanTransforms {
130130
/// exit conditions
131131
/// * splitting the original middle block to branch to the early exit block
132132
/// if taken.
133-
static bool handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
133+
static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
134134
Loop *OrigLoop,
135135
BasicBlock *UncountableExitingBlock,
136136
VPRecipeBuilder &RecipeBuilder);

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,11 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
222222
continue;
223223
}
224224

225-
if (!VPDT.dominates(VPBB, UI->getParent())) {
225+
// Now that we support vectorising loops with uncountable early exits
226+
// we can end up in situations where VPBB does not dominate the exit
227+
// block. Only do the check if the user is not in a VPIRBasicBlock.
228+
if (!isa<VPIRBasicBlock>(UI->getParent()) &&
229+
!VPDT.dominates(VPBB, UI->getParent())) {
226230
errs() << "Use before def!\n";
227231
return false;
228232
}

0 commit comments

Comments
 (0)