Skip to content

Commit 09a29fc

Browse files
authored
[VPlan] Don't collect live-ins in collectUsersInExitBlocks. (NFC) (#123819)
Live-ins don't need to be handled, other than adding to the exit phi recipe. Do that early and assert that otherwise the exit value is defined in the vector loop region. This should enable simply skipping other exit values that do not need further fixing, e.g. if handling the exit value from the early exit directly in handleUncountableEarlyExit. PR: #123819
1 parent 749443a commit 09a29fc

File tree

6 files changed

+119
-52
lines changed

6 files changed

+119
-52
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 31 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -9033,7 +9033,6 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
90339033
static SetVector<VPIRInstruction *>
90349034
collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
90359035
VPlan &Plan) {
9036-
auto *MiddleVPBB = Plan.getMiddleBlock();
90379036
SetVector<VPIRInstruction *> ExitUsersToFix;
90389037
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
90399038
for (VPRecipeBase &R : *ExitVPBB) {
@@ -9043,33 +9042,33 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
90439042
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
90449043
if (!ExitPhi)
90459044
break;
9046-
for (VPBlockBase *PredVPBB : ExitVPBB->getPredecessors()) {
9047-
BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
9048-
if (PredVPBB != MiddleVPBB) {
9049-
SmallVector<BasicBlock *> ExitingBlocks;
9050-
OrigLoop->getExitingBlocks(ExitingBlocks);
9051-
assert(ExitingBlocks.size() == 2 && "only support 2 exiting blocks");
9052-
ExitingBB = ExitingBB == ExitingBlocks[0] ? ExitingBlocks[1]
9053-
: ExitingBlocks[0];
9054-
}
9055-
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
9056-
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
9057-
ExitUsersToFix.insert(ExitIRI);
9058-
ExitIRI->addOperand(V);
9045+
if (ExitVPBB->getSinglePredecessor() != Plan.getMiddleBlock()) {
9046+
assert(ExitIRI->getNumOperands() ==
9047+
ExitVPBB->getPredecessors().size() &&
9048+
"early-exit must update exit values on construction");
9049+
continue;
90599050
}
9051+
BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
9052+
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
9053+
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
9054+
ExitIRI->addOperand(V);
9055+
if (V->isLiveIn())
9056+
continue;
9057+
assert(V->getDefiningRecipe()->getParent()->getEnclosingLoopRegion() &&
9058+
"Only recipes defined inside a region should need fixing.");
9059+
ExitUsersToFix.insert(ExitIRI);
90609060
}
90619061
}
90629062
return ExitUsersToFix;
90639063
}
90649064

90659065
// Add exit values to \p Plan. Extracts are added for each entry in \p
9066-
// ExitUsersToFix if needed and their operands are updated. Returns true if all
9067-
// exit users can be handled, otherwise return false.
9068-
static bool
9066+
// ExitUsersToFix if needed and their operands are updated.
9067+
static void
90699068
addUsersInExitBlocks(VPlan &Plan,
90709069
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
90719070
if (ExitUsersToFix.empty())
9072-
return true;
9071+
return;
90739072

90749073
auto *MiddleVPBB = Plan.getMiddleBlock();
90759074
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
@@ -9078,25 +9077,12 @@ addUsersInExitBlocks(VPlan &Plan,
90789077
// Introduce extract for exiting values and update the VPIRInstructions
90799078
// modeling the corresponding LCSSA phis.
90809079
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
9081-
for (const auto &[Idx, Op] : enumerate(ExitIRI->operands())) {
9082-
// Pass live-in values used by exit phis directly through to their users
9083-
// in the exit block.
9084-
if (Op->isLiveIn())
9085-
continue;
9086-
9087-
// Currently only live-ins can be used by exit values from blocks not
9088-
// exiting via the vector latch through to the middle block.
9089-
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9090-
return false;
9091-
9092-
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
9093-
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
9094-
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
9095-
IntegerType::get(Ctx, 32), 1))});
9096-
ExitIRI->setOperand(Idx, Ext);
9097-
}
9080+
assert(ExitIRI->getNumOperands() == 1 &&
9081+
ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
9082+
"exit values from early exits must be fixed when branch to "
9083+
"early-exit is added");
9084+
ExitIRI->extractLastLaneOfOperand(B);
90989085
}
9099-
return true;
91009086
}
91019087

91029088
/// Handle users in the exit block for first order reductions in the original
@@ -9392,20 +9378,21 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93929378

93939379
if (auto *UncountableExitingBlock =
93949380
Legal->getUncountableEarlyExitingBlock()) {
9395-
VPlanTransforms::handleUncountableEarlyExit(
9396-
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9381+
if (!VPlanTransforms::handleUncountableEarlyExit(
9382+
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock,
9383+
RecipeBuilder)) {
9384+
reportVectorizationFailure(
9385+
"Some exit values in loop with uncountable exit not supported yet",
9386+
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
9387+
return nullptr;
9388+
}
93979389
}
93989390
DenseMap<VPValue *, VPValue *> IVEndValues;
93999391
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
94009392
SetVector<VPIRInstruction *> ExitUsersToFix =
94019393
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
94029394
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9403-
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9404-
reportVectorizationFailure(
9405-
"Some exit values in loop with uncountable exit not supported yet",
9406-
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
9407-
return nullptr;
9408-
}
9395+
addUsersInExitBlocks(*Plan, ExitUsersToFix);
94099396

94109397
// ---------------------------------------------------------------------------
94119398
// Transform initial VPlan: Apply previously taken decisions, in order, to

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class RecurrenceDescriptor;
6060
class SCEV;
6161
class Type;
6262
class VPBasicBlock;
63+
class VPBuilder;
6364
class VPRegionBlock;
6465
class VPlan;
6566
class VPReplicateRecipe;
@@ -1422,6 +1423,11 @@ class VPIRInstruction : public VPRecipeBase {
14221423
"Op must be an operand of the recipe");
14231424
return true;
14241425
}
1426+
1427+
/// Update the recipes single operand to the last lane of the operand using \p
1428+
/// Builder. Must only be used for single operand VPIRInstructions wrapping a
1429+
/// PHINode.
1430+
void extractLastLaneOfOperand(VPBuilder &Builder);
14251431
};
14261432

14271433
/// VPWidenRecipe is a recipe for producing a widened instruction using the

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
///
1212
//===----------------------------------------------------------------------===//
1313

14+
#include "LoopVectorizationPlanner.h"
1415
#include "VPlan.h"
1516
#include "VPlanAnalysis.h"
1617
#include "VPlanPatternMatch.h"
@@ -937,6 +938,22 @@ InstructionCost VPIRInstruction::computeCost(ElementCount VF,
937938
return 0;
938939
}
939940

941+
void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) {
942+
assert(isa<PHINode>(getInstruction()) &&
943+
"can only add exiting operands to phi nodes");
944+
assert(getNumOperands() == 1 && "must have a single operand");
945+
VPValue *Exiting = getOperand(0);
946+
if (!Exiting->isLiveIn()) {
947+
LLVMContext &Ctx = getInstruction().getContext();
948+
auto &Plan = *getParent()->getPlan();
949+
Exiting = Builder.createNaryOp(
950+
VPInstruction::ExtractFromEnd,
951+
{Exiting,
952+
Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::get(Ctx, 32), 1))});
953+
}
954+
setOperand(0, Exiting);
955+
}
956+
940957
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
941958
void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
942959
VPSlotTracker &SlotTracker) const {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2062,7 +2062,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
20622062
}
20632063
}
20642064

2065-
void VPlanTransforms::handleUncountableEarlyExit(
2065+
bool VPlanTransforms::handleUncountableEarlyExit(
20662066
VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
20672067
BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
20682068
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
@@ -2103,7 +2103,32 @@ void VPlanTransforms::handleUncountableEarlyExit(
21032103
VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock);
21042104
NewMiddle->swapSuccessors();
21052105

2106+
// Update the exit phis in the early exit block.
21062107
VPBuilder MiddleBuilder(NewMiddle);
2108+
for (VPRecipeBase &R : *VPEarlyExitBlock) {
2109+
auto *ExitIRI = cast<VPIRInstruction>(&R);
2110+
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
2111+
if (!ExitPhi)
2112+
break;
2113+
2114+
VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn(
2115+
ExitPhi->getIncomingValueForBlock(UncountableExitingBlock));
2116+
// The incoming value from the early exit must be a live-in for now.
2117+
if (!IncomingFromEarlyExit->isLiveIn())
2118+
return false;
2119+
2120+
if (OrigLoop->getUniqueExitBlock()) {
2121+
// If there's a unique exit block, VPEarlyExitBlock has 2 predecessors
2122+
// (MiddleVPBB and NewMiddle). Add the incoming value from MiddleVPBB
2123+
// which is coming from the original latch.
2124+
VPValue *IncomingFromLatch = RecipeBuilder.getVPValueOrAddLiveIn(
2125+
ExitPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
2126+
ExitIRI->addOperand(IncomingFromLatch);
2127+
ExitIRI->extractLastLaneOfOperand(MiddleBuilder);
2128+
}
2129+
// Add the incoming value from the early exit.
2130+
ExitIRI->addOperand(IncomingFromEarlyExit);
2131+
}
21072132
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
21082133

21092134
// Replace the condition controlling the non-early exit from the vector loop
@@ -2119,4 +2144,5 @@ void VPlanTransforms::handleUncountableEarlyExit(
21192144
Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
21202145
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
21212146
LatchExitingBranch->eraseFromParent();
2147+
return true;
21222148
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ struct VPlanTransforms {
130130
/// exit conditions
131131
/// * splitting the original middle block to branch to the early exit block
132132
/// if taken.
133-
static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
133+
static bool handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
134134
Loop *OrigLoop,
135135
BasicBlock *UncountableExitingBlock,
136136
VPRecipeBuilder &RecipeBuilder);

llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,21 +217,50 @@ define i64 @same_exit_block_pre_inc_use2() {
217217
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
218218
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
219219
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
220+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
221+
; CHECK: vector.ph:
220222
; CHECK-NEXT: br label [[LOOP:%.*]]
223+
; CHECK: vector.body:
224+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
225+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[LOOP]] ]
226+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
227+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
228+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
229+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
230+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
231+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
232+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
233+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
234+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
235+
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
236+
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
237+
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
238+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
239+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
240+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
241+
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
242+
; CHECK: middle.split:
243+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
244+
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
245+
; CHECK: middle.block:
246+
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
247+
; CHECK: scalar.ph:
248+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
249+
; CHECK-NEXT: br label [[LOOP1:%.*]]
221250
; CHECK: loop:
222-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
251+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
223252
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
224253
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
225254
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
226255
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
227256
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
228-
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
257+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
229258
; CHECK: loop.inc:
230259
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
231260
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
232-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
261+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
233262
; CHECK: loop.end:
234-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ]
263+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 67, [[MIDDLE_SPLIT]] ]
235264
; CHECK-NEXT: ret i64 [[RETVAL]]
236265
;
237266
entry:
@@ -548,7 +577,7 @@ define i64 @diff_exit_block_pre_inc_use2() {
548577
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
549578
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
550579
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
551-
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
580+
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
552581
; CHECK: middle.split:
553582
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
554583
; CHECK: middle.block:
@@ -568,7 +597,7 @@ define i64 @diff_exit_block_pre_inc_use2() {
568597
; CHECK: loop.inc:
569598
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
570599
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
571-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
600+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
572601
; CHECK: loop.early.exit:
573602
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ 67, [[MIDDLE_SPLIT]] ]
574603
; CHECK-NEXT: ret i64 [[RETVAL1]]
@@ -1029,4 +1058,6 @@ attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
10291058
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
10301059
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
10311060
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1061+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1062+
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
10321063
;.

0 commit comments

Comments
 (0)