-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Delay adding canonical IV increment. #82270
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2ae1d69
470e0ae
311f105
fbf1e06
53b812d
304f11b
4afb1f0
6af2b50
e1cd5d9
9770a1e
62e78e9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -7647,8 +7647,19 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |||||
(!VectorizingEpilogue && !ExpandedSCEVs)) && | ||||||
"expanded SCEVs to reuse can only be used during epilogue vectorization"); | ||||||
|
||||||
// TODO: Move to VPlan transform stage once the transition to the VPlan-based | ||||||
// cost model is complete for better cost estimates. | ||||||
bool IVUpdateMayOverflow = | ||||||
!isIndvarOverflowCheckKnownFalse(&CM, BestVF, BestUF); | ||||||
TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow); | ||||||
bool WithoutRuntimeCheck = | ||||||
Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; | ||||||
// Use NUW for the induction increment if we proved that it won't overflow in | ||||||
// the vector loop or when not folding the tail. In the latter case, we know | ||||||
// that the canonical induction increment will not overflow as the vector trip | ||||||
// count is >= increment and a multiple of the increment. | ||||||
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None; | ||||||
// TODO: Move transforms to VPlan transform stage once the transition to the | ||||||
// VPlan-based cost model is complete for better cost estimates. | ||||||
VPlanTransforms::convertCanonicalIV(BestVPlan, HasNUW, WithoutRuntimeCheck); | ||||||
VPlanTransforms::unrollByUF(BestVPlan, BestUF, | ||||||
OrigLoop->getHeader()->getContext()); | ||||||
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE); | ||||||
|
@@ -8902,29 +8913,26 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, | |||||
} | ||||||
} | ||||||
|
||||||
// Add the necessary canonical IV and branch recipes required to control the | ||||||
// loop. | ||||||
static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, | ||||||
DebugLoc DL) { | ||||||
// Add the required canonical IV along with its loop branch, but w/o its | ||||||
// increment - which is introduced later. | ||||||
static void addCanonicalIV(VPlan &Plan, Type *IdxTy, DebugLoc DL) { | ||||||
Value *StartIdx = ConstantInt::get(IdxTy, 0); | ||||||
auto *StartV = Plan.getOrAddLiveIn(StartIdx); | ||||||
|
||||||
// Add a VPCanonicalIVPHIRecipe starting at 0 to the header. | ||||||
// TODO: Introduce a separate scalar phi recipe that can be used for codegen, | ||||||
// turning VPCanonicalIVPHIRecipe into an 'abstract' recipe which cannot be | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. VPCanonicalIVPHIRecipe, as a phi recipe in the header block, and BranchOnCount as a recipe in the latch, seem to retain their individual concrete semantics, even if the increment is introduced later between them. |
||||||
// executed directly. | ||||||
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL); | ||||||
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); | ||||||
VPBasicBlock *Header = TopRegion->getEntryBasicBlock(); | ||||||
Header->insert(CanonicalIVPHI, Header->begin()); | ||||||
|
||||||
VPBuilder Builder(TopRegion->getExitingBasicBlock()); | ||||||
// Add a VPInstruction to increment the scalar canonical IV by VF * UF. | ||||||
auto *CanonicalIVIncrement = Builder.createOverflowingOp( | ||||||
Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL, | ||||||
"index.next"); | ||||||
CanonicalIVPHI->addOperand(CanonicalIVIncrement); | ||||||
|
||||||
// Add the BranchOnCount VPInstruction to the latch. | ||||||
VPBuilder Builder(TopRegion->getExitingBasicBlock()); | ||||||
// TODO: introduce branch-on-count during VPlan final (pre-codegen) lowering. | ||||||
Builder.createNaryOp(VPInstruction::BranchOnCount, | ||||||
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); | ||||||
{CanonicalIVPHI, &Plan.getVectorTripCount()}, DL); | ||||||
Comment on lines
+8933
to
+8935
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about having BranchOnCount also be responsible for bumping the IV, at least initially? (Inspired by PowerPC's bdnz instruction; can call it BranchOnIncrementCount.) It could then feed back the canonical IV Phi across the back-edge, and possibly be split into a separate Add later to simplify code-gen. |
||||||
} | ||||||
|
||||||
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the | ||||||
|
@@ -9230,22 +9238,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||||
PSE, RequiresScalarEpilogueCheck, | ||||||
CM.foldTailByMasking(), OrigLoop); | ||||||
|
||||||
// Don't use getDecisionAndClampRange here, because we don't know the UF | ||||||
// so this function is better to be conservative, rather than to split | ||||||
// it up into different VPlans. | ||||||
// TODO: Consider using getDecisionAndClampRange here to split up VPlans. | ||||||
bool IVUpdateMayOverflow = false; | ||||||
for (ElementCount VF : Range) | ||||||
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF); | ||||||
|
||||||
DebugLoc DL = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); | ||||||
TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow); | ||||||
// Use NUW for the induction increment if we proved that it won't overflow in | ||||||
// the vector loop or when not folding the tail. In the later case, we know | ||||||
// that the canonical induction increment will not overflow as the vector trip | ||||||
// count is >= increment and a multiple of the increment. | ||||||
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None; | ||||||
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL); | ||||||
addCanonicalIV(*Plan, Legal->getWidestInductionType(), DL); | ||||||
|
||||||
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, | ||||||
Builder); | ||||||
|
@@ -9471,6 +9465,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||||
if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder)) | ||||||
return nullptr; | ||||||
|
||||||
// Don't use getDecisionAndClampRange here, because we don't know the UF | ||||||
// so this function is better to be conservative, rather than to split | ||||||
// it up into different VPlans. | ||||||
// TODO: Consider using getDecisionAndClampRange here to split up VPlans. | ||||||
bool IVUpdateMayOverflow = false; | ||||||
for (ElementCount VF : Range) | ||||||
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF); | ||||||
|
||||||
TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow); | ||||||
Comment on lines
+9468
to
+9476
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is moved here from earlier in tryToBuildVPlanWithVPRecipes(), so this information is still computed conservatively at this stage. |
||||||
if (useActiveLaneMask(Style)) { | ||||||
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once | ||||||
// TailFoldingStyle is visible there. | ||||||
|
@@ -9516,11 +9519,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { | |||||
Plan->getVectorLoopRegion()->getExitingBasicBlock()->getTerminator(); | ||||||
Term->eraseFromParent(); | ||||||
|
||||||
// Tail folding is not supported for outer loops, so the induction increment | ||||||
// is guaranteed to not wrap. | ||||||
bool HasNUW = true; | ||||||
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, | ||||||
DebugLoc()); | ||||||
addCanonicalIV(*Plan, Legal->getWidestInductionType(), DebugLoc()); | ||||||
|
||||||
// Collect mapping of IR header phis to header phi recipes, to be used in | ||||||
// addScalarResumePhis. | ||||||
|
@@ -10272,7 +10271,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, | |||||
isa<VPScalarCastRecipe>(U) || | ||||||
isa<VPDerivedIVRecipe>(U) || | ||||||
cast<VPInstruction>(U)->getOpcode() == | ||||||
Instruction::Add; | ||||||
VPInstruction::BranchOnCount; | ||||||
}) && | ||||||
"the canonical IV should only be used by its increment or " | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
"ScalarIVSteps when resetting the start value"); | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3239,7 +3239,8 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { | |
|
||
VPCanonicalIVPHIRecipe *clone() override { | ||
auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc()); | ||
R->addOperand(getBackedgeValue()); | ||
if (getNumOperands() == 2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems reasonable to expect every header phi recipe to have two operands (always) - implementing getStartValue() and getBackedgeValue() of VPHeaderPhiRecipe? |
||
R->addOperand(getBackedgeValue()); | ||
return R; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Setting HasNUW here during VPlan execute is more accurate because of the BestUF --> IVUpdateMayOverflow --> tail-fold-style --> HasNUW dependence (i.e., along with "setUF() or "unrollByUF"), but is there a better way to set this earlier during VPlan planning, possibly splitting VF ranges - and UF ranges?