Skip to content

Commit 3d30a34

Browse files
committed
!fixup move early exit exit value handling to VPlanTransform.
1 parent d0e00e8 commit 3d30a34

File tree

4 files changed

+100
-44
lines changed

4 files changed

+100
-44
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9031,7 +9031,6 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
90319031
static SetVector<VPIRInstruction *>
90329032
collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
90339033
VPlan &Plan) {
9034-
auto *MiddleVPBB = Plan.getMiddleBlock();
90359034
SetVector<VPIRInstruction *> ExitUsersToFix;
90369035
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
90379036
for (VPRecipeBase &R : *ExitVPBB) {
@@ -9041,37 +9040,33 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
90419040
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
90429041
if (!ExitPhi)
90439042
break;
9044-
for (VPBlockBase *PredVPBB : ExitVPBB->getPredecessors()) {
9045-
BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
9046-
if (PredVPBB != MiddleVPBB) {
9047-
SmallVector<BasicBlock *> ExitingBlocks;
9048-
OrigLoop->getExitingBlocks(ExitingBlocks);
9049-
assert(ExitingBlocks.size() == 2 && "only support 2 exiting blocks");
9050-
ExitingBB = ExitingBB == ExitingBlocks[0] ? ExitingBlocks[1]
9051-
: ExitingBlocks[0];
9052-
}
9053-
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
9054-
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
9055-
ExitIRI->addOperand(V);
9056-
if (V->isLiveIn())
9057-
continue;
9058-
assert(V->getDefiningRecipe()->getParent()->getEnclosingLoopRegion() &&
9059-
"Only recipes defined inside a region should need fixing.");
9060-
ExitUsersToFix.insert(ExitIRI);
9043+
if (ExitVPBB->getSinglePredecessor() != Plan.getMiddleBlock()) {
9044+
assert(ExitIRI->getNumOperands() ==
9045+
ExitVPBB->getPredecessors().size() &&
9046+
"early-exit must update exit values on construction");
9047+
continue;
90619048
}
9049+
BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
9050+
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
9051+
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
9052+
ExitIRI->addOperand(V);
9053+
if (V->isLiveIn())
9054+
continue;
9055+
assert(V->getDefiningRecipe()->getParent()->getEnclosingLoopRegion() &&
9056+
"Only recipes defined inside a region should need fixing.");
9057+
ExitUsersToFix.insert(ExitIRI);
90629058
}
90639059
}
90649060
return ExitUsersToFix;
90659061
}
90669062

90679063
// Add exit values to \p Plan. Extracts are added for each entry in \p
9068-
// ExitUsersToFix if needed and their operands are updated. Returns true if all
9069-
// exit users can be handled, otherwise return false.
9070-
static bool
9064+
// ExitUsersToFix if needed and their operands are updated.
9065+
static void
90719066
addUsersInExitBlocks(VPlan &Plan,
90729067
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
90739068
if (ExitUsersToFix.empty())
9074-
return true;
9069+
return;
90759070

90769071
auto *MiddleVPBB = Plan.getMiddleBlock();
90779072
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
@@ -9081,19 +9076,16 @@ addUsersInExitBlocks(VPlan &Plan,
90819076
// modeling the corresponding LCSSA phis.
90829077
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
90839078
for (const auto &[Idx, Op] : enumerate(ExitIRI->operands())) {
9084-
// Currently only live-ins can be used by exit values from blocks not
9085-
// exiting via the vector latch through to the middle block.
9086-
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9087-
return false;
9088-
9079+
assert(ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
9080+
"exit values from early exits must be fixed when branch to "
9081+
"early-exit is added");
90899082
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
90909083
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
90919084
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
90929085
IntegerType::get(Ctx, 32), 1))});
90939086
ExitIRI->setOperand(Idx, Ext);
90949087
}
90959088
}
9096-
return true;
90979089
}
90989090

90999091
/// Handle users in the exit block for first order reductions in the original
@@ -9389,20 +9381,21 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93899381

93909382
if (auto *UncountableExitingBlock =
93919383
Legal->getUncountableEarlyExitingBlock()) {
9392-
VPlanTransforms::handleUncountableEarlyExit(
9393-
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9384+
if (!VPlanTransforms::handleUncountableEarlyExit(
9385+
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock,
9386+
RecipeBuilder)) {
9387+
reportVectorizationFailure(
9388+
"Some exit values in loop with uncountable exit not supported yet",
9389+
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
9390+
return nullptr;
9391+
}
93949392
}
93959393
DenseMap<VPValue *, VPValue *> IVEndValues;
93969394
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
93979395
SetVector<VPIRInstruction *> ExitUsersToFix =
93989396
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
93999397
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9400-
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9401-
reportVectorizationFailure(
9402-
"Some exit values in loop with uncountable exit not supported yet",
9403-
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
9404-
return nullptr;
9405-
}
9398+
addUsersInExitBlocks(*Plan, ExitUsersToFix);
94069399

94079400
// ---------------------------------------------------------------------------
94089401
// Transform initial VPlan: Apply previously taken decisions, in order, to

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2062,7 +2062,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
20622062
}
20632063
}
20642064

2065-
void VPlanTransforms::handleUncountableEarlyExit(
2065+
bool VPlanTransforms::handleUncountableEarlyExit(
20662066
VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
20672067
BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
20682068
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
@@ -2103,7 +2103,38 @@ void VPlanTransforms::handleUncountableEarlyExit(
21032103
VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock);
21042104
NewMiddle->swapSuccessors();
21052105

2106+
// Update the exit phis in the early exit block.
21062107
VPBuilder MiddleBuilder(NewMiddle);
2108+
for (VPRecipeBase &R : *VPEarlyExitBlock) {
2109+
auto *ExitIRI = cast<VPIRInstruction>(&R);
2110+
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
2111+
if (!ExitPhi)
2112+
break;
2113+
2114+
VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn(
2115+
ExitPhi->getIncomingValueForBlock(UncountableExitingBlock));
2116+
// The incoming value from the early exit must be a live-in for now.
2117+
if (!IncomingFromm EarlyExit->isLiveIn())
2118+
return false;
2119+
2120+
if (OrigLoop->getUniqueExitBlock()) {
2121+
// If there's a unique exit block, VPEarlyExitBlock has 2 predecessors
2122+
// (MiddleVPBB and NewMiddle). Add the incoming value from MiddleVPBB
2123+
// which is coming from the original latch.
2124+
VPValue *IncomingFromLatch = RecipeBuilder.getVPValueOrAddLiveIn(
2125+
ExitPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
2126+
if (!IncomingFromLatch->isLiveIn()) {
2127+
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
2128+
IncomingFromLatch = MiddleBuilder.createNaryOp(
2129+
VPInstruction::ExtractFromEnd,
2130+
{IncomingFromLatch, Plan.getOrAddLiveIn(ConstantInt::get(
2131+
IntegerType::get(Ctx, 32), 1))});
2132+
}
2133+
ExitIRI->addOperand(IncomingFromLatch);
2134+
}
2135+
// Add the incoming value from the early exit.
2136+
ExitIRI->addOperand(IncomingFromEarlyExit);
2137+
}
21072138
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
21082139

21092140
// Replace the condition controlling the non-early exit from the vector loop
@@ -2119,4 +2150,5 @@ void VPlanTransforms::handleUncountableEarlyExit(
21192150
Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
21202151
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
21212152
LatchExitingBranch->eraseFromParent();
2153+
return true;
21222154
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ struct VPlanTransforms {
130130
/// exit conditions
131131
/// * splitting the original middle block to branch to the early exit block
132132
/// if taken.
133-
static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
133+
static bool handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
134134
Loop *OrigLoop,
135135
BasicBlock *UncountableExitingBlock,
136136
VPRecipeBuilder &RecipeBuilder);

llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,21 +217,50 @@ define i64 @same_exit_block_pre_inc_use2() {
217217
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
218218
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
219219
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
220+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
221+
; CHECK: vector.ph:
220222
; CHECK-NEXT: br label [[LOOP:%.*]]
223+
; CHECK: vector.body:
224+
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
225+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[LOOP]] ]
226+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
227+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
228+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
229+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
230+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
231+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
232+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
233+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
234+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
235+
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
236+
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
237+
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
238+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
239+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
240+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
241+
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
242+
; CHECK: middle.split:
243+
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
244+
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
245+
; CHECK: middle.block:
246+
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
247+
; CHECK: scalar.ph:
248+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
249+
; CHECK-NEXT: br label [[LOOP1:%.*]]
221250
; CHECK: loop:
222-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
251+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
223252
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
224253
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
225254
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
226255
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
227256
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
228-
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
257+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
229258
; CHECK: loop.inc:
230259
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
231260
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
232-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
261+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
233262
; CHECK: loop.end:
234-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ]
263+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 67, [[MIDDLE_SPLIT]] ]
235264
; CHECK-NEXT: ret i64 [[RETVAL]]
236265
;
237266
entry:
@@ -548,7 +577,7 @@ define i64 @diff_exit_block_pre_inc_use2() {
548577
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
549578
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
550579
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
551-
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
580+
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
552581
; CHECK: middle.split:
553582
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
554583
; CHECK: middle.block:
@@ -568,7 +597,7 @@ define i64 @diff_exit_block_pre_inc_use2() {
568597
; CHECK: loop.inc:
569598
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
570599
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
571-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
600+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
572601
; CHECK: loop.early.exit:
573602
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ 67, [[MIDDLE_SPLIT]] ]
574603
; CHECK-NEXT: ret i64 [[RETVAL1]]
@@ -1029,4 +1058,6 @@ attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
10291058
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
10301059
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
10311060
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1061+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1062+
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
10321063
;.

0 commit comments

Comments
 (0)