-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Update final IV exit value via VPlan. #112147
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
971caa3
to
8ecee6b
Compare
8ecee6b
to
9b6fa7c
Compare
@llvm/pr-subscribers-backend-risc-v Author: Florian Hahn (fhahn) ChangesModel updating IV users directly in VPlan, replace fixupIVUsers. This completes the transition towards modeling all live-outs directly in VPlan. Depends on #110004, Patch is 21.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112147.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1975df3cacbcae..e95797591bb297 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -542,11 +542,6 @@ class InnerLoopVectorizer {
protected:
friend class LoopVectorizationPlanner;
- /// Set up the values of the IVs correctly when exiting the vector loop.
- virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State);
-
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
@@ -775,10 +770,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
void printDebugTracesAtStart() override;
void printDebugTracesAtEnd() override;
-
- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State) override {};
};
// A specialized derived class of inner loop vectorizer that performs
@@ -2751,97 +2742,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
return LoopVectorPreHeader;
}
-// Fix up external users of the induction variable. At this point, we are
-// in LCSSA form, with all external PHIs that use the IV having one input value,
-// coming from the remainder loop. We need those PHIs to also have a correct
-// value for the IV when arriving directly from the middle block.
-void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
- const InductionDescriptor &II,
- Value *VectorTripCount,
- BasicBlock *MiddleBlock,
- VPTransformState &State) {
- // There are two kinds of external IV usages - those that use the value
- // computed in the last iteration (the PHI) and those that use the penultimate
- // value (the value that feeds into the phi from the loop latch).
- // We allow both, but they, obviously, have different values.
-
- DenseMap<Value *, Value *> MissingVals;
-
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
- OrigLoop->getLoopPreheader()))
- ->getIncomingValueForBlock(MiddleBlock);
-
- // An external user of the last iteration's value should see the value that
- // the remainder loop uses to initialize its own IV.
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
- for (User *U : PostInc->users()) {
- Instruction *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- MissingVals[UI] = EndValue;
- }
- }
-
- // An external user of the penultimate value need to see EndValue - Step.
- // The simplest way to get this is to recompute it from the constituent SCEVs,
- // that is Start + (Step * (CRD - 1)).
- for (User *U : OrigPhi->users()) {
- auto *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- IRBuilder<> B(MiddleBlock->getTerminator());
-
- // Fast-math-flags propagate from the original induction instruction.
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
-
- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
- assert(StepVPV && "step must have been expanded during VPlan execution");
- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
- : State.get(StepVPV, VPLane(0));
- Value *Escape = nullptr;
- if (EndValue->getType()->isIntegerTy())
- Escape = B.CreateSub(EndValue, Step);
- else if (EndValue->getType()->isPointerTy())
- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
- else {
- assert(EndValue->getType()->isFloatingPointTy() &&
- "Unexpected induction type");
- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
- Instruction::FAdd
- ? Instruction::FSub
- : Instruction::FAdd,
- EndValue, Step);
- }
- Escape->setName("ind.escape");
- MissingVals[UI] = Escape;
- }
- }
-
- assert((MissingVals.empty() ||
- all_of(MissingVals,
- [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
- return all_of(
- predecessors(cast<Instruction>(P.first)->getParent()),
- [MiddleBlock, this](BasicBlock *Pred) {
- return Pred == MiddleBlock ||
- Pred == OrigLoop->getLoopLatch();
- });
- })) &&
- "Expected escaping values from latch/middle.block only");
-
- for (auto &I : MissingVals) {
- PHINode *PHI = cast<PHINode>(I.first);
- // One corner case we have to handle is two IVs "chasing" each-other,
- // that is %IV2 = phi [...], [ %IV1, %latch ]
- // In this case, if IV1 has an external use, we need to avoid adding both
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
- // don't already have an incoming value for the middle block.
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
- PHI->addIncoming(I.second, MiddleBlock);
- }
-}
-
namespace {
struct CSEDenseMapInfo {
@@ -2986,24 +2886,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
for (PHINode &PN : Exit->phis())
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
- // No edge from the middle block to the unique exit block has been inserted
- // and there is nothing to fix from vector loop; phis should have incoming
- // from scalar loop only.
- } else {
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
- // the cost model.
-
- // If we inserted an edge from the middle block to the unique exit block,
- // update uses outside the loop (phis) to account for the newly inserted
- // edge.
-
- // Fix-up external users of the induction variables.
- for (const auto &Entry : Legal->getInductionVars())
- fixupIVUsers(Entry.first, Entry.second,
- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
- }
-
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
/// the end value of the induction.
-static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
- VPBuilder &VectorPHBuilder,
- VPBuilder &ScalarPHBuilder,
- VPTypeAnalysis &TypeInfo,
- VPValue *VectorTC) {
+static VPValue *addResumePhiRecipeForInduction(
+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
// Truncated wide inductions resume from the last lane of their vector value
// in the last vector iteration which is handled elsewhere.
@@ -8886,6 +8767,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
ScalarTypeOfWideIV);
}
+ EndValues[WideIV] = EndValue;
auto *ResumePhiRecipe =
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
/// Create resume phis in the scalar preheader for first-order recurrences,
/// reductions and inductions, and update the VPIRInstructions wrapping the
/// original phis in the scalar header.
-static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
+static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
+ Loop *OrigLoop,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
- &Plan.getVectorTripCount())) {
+ &Plan.getVectorTripCount(), EndValues)) {
ScalarPhiIRI->addOperand(ResumePhi);
continue;
}
@@ -8949,9 +8833,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
// modeled explicitly yet and won't be included. Those are un-truncated
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
// increments.
-static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
- const MapVector<PHINode *, InductionDescriptor> &Inductions) {
+static SetVector<VPIRInstruction *>
+collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
+ VPlan &Plan) {
auto *MiddleVPBB = Plan.getMiddleBlock();
SetVector<VPIRInstruction *> ExitUsersToFix;
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8976,18 +8860,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
// Exit values for inductions are computed and updated outside of VPlan
// and independent of induction recipes.
// TODO: Compute induction exit values in VPlan.
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
- isa<VPWidenPointerInductionRecipe>(V) ||
- (isa<Instruction>(IncomingValue) &&
- OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
- any_of(IncomingValue->users(), [&Inductions](User *U) {
- auto *P = dyn_cast<PHINode>(U);
- return P && Inductions.contains(P);
- }))) {
- if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
- continue;
- }
ExitUsersToFix.insert(ExitIRI);
ExitIRI->addOperand(V);
}
@@ -8996,17 +8868,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
return ExitUsersToFix;
}
+/// If \p Incoming is a user of a non-truncated induction, create recipes to
+/// compute the final value and update the user \p ExitIRI.
+static bool addInductionEndValue(
+ VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
+ (isa<Instruction>(Incoming->getUnderlyingValue()) &&
+ any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
+ [&Inductions](User *U) {
+ auto *P = dyn_cast<PHINode>(U);
+ return P && Inductions.contains(P);
+ }))) {
+ VPValue *IV;
+ if (auto *WideIV =
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
+ IV = WideIV;
+ else if (auto *WideIV =
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()
+ ->getOperand(0)
+ ->getDefiningRecipe()))
+ IV = WideIV;
+ else
+ IV = Incoming->getDefiningRecipe()->getOperand(1);
+ // Skip phi nodes already updated. This can be the case if 2 induction
+ // phis chase each other.
+ VPValue *EndValue = EndValues[IV];
+ if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
+ IsaPred<VPWidenIntOrFpInductionRecipe,
+ VPWidenPointerInductionRecipe>)) {
+ ExitIRI->setOperand(0, EndValue);
+ return true;
+ }
+
+ VPBuilder B(Plan.getMiddleBlock()->getTerminator());
+ VPValue *Escape = nullptr;
+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
+ VPValue *Step = WideIV->getStepValue();
+ Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
+ if (ScalarTy->isIntegerTy())
+ Escape =
+ B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
+ else if (ScalarTy->isPointerTy())
+ Escape = B.createPtrAdd(
+ EndValue,
+ B.createNaryOp(Instruction::Sub,
+ {Plan.getOrAddLiveIn(ConstantInt::get(
+ Step->getLiveInIRValue()->getType(), 0)),
+ Step}),
+ {}, "ind.escape");
+ else if (ScalarTy->isFloatingPointTy()) {
+ const auto &ID = WideIV->getInductionDescriptor();
+ Escape = B.createNaryOp(
+ ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
+ ? Instruction::FSub
+ : Instruction::FAdd,
+ {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
+ } else {
+ llvm_unreachable("all possible induction types must be handled");
+ }
+ ExitIRI->setOperand(0, Escape);
+ return true;
+ }
+ return false;
+}
// Add exit values to \p Plan. Extracts are added for each entry in \p
// ExitUsersToFix if needed and their operands are updated. Returns true if all
// exit users can be handled, otherwise return false.
-static bool
-addUsersInExitBlocks(VPlan &Plan,
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
+static bool addUsersInExitBlocks(
+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
if (ExitUsersToFix.empty())
return true;
auto *MiddleVPBB = Plan.getMiddleBlock();
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
// Introduce extract for exiting values and update the VPIRInstructions
// modeling the corresponding LCSSA phis.
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan,
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
return false;
+ VPValue *Incoming = ExitIRI->getOperand(0);
+ if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues,
+ TypeInfo))
+ continue;
+
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
IntegerType::get(Ctx, 32), 1))});
- ExitIRI->setOperand(Idx, Ext);
+ ExitIRI->setOperand(0, Ext);
}
}
return true;
@@ -9307,11 +9253,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPlanTransforms::handleUncountableEarlyExit(
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
+ DenseMap<VPValue *, VPValue *> EndValues;
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
+ SetVector<VPIRInstruction *> ExitUsersToFix =
+ collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
- if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
+ if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, Legal->getInductionVars(),
+ EndValues)) {
reportVectorizationFailure(
"Some exit values in loop with uncountable exit not supported yet",
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
@@ -9438,7 +9386,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
+ DenseMap<VPValue *, VPValue *> EndValues;
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 9a082921d4f7f2..d763a9a605294b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -315,16 +315,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}
- auto *LastInst = cast<Instruction>(get(Def, LastLane));
- // Set the insert point after the last scalarized instruction or after the
- // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
- // will directly follow the scalar definitions.
+ auto *LastDef = get(Def, LastLane);
auto OldIP = Builder.saveIP();
- auto NewIP =
- isa<PHINode>(LastInst)
- ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
- : std::next(BasicBlock::iterator(LastInst));
- Builder.SetInsertPoint(&*NewIP);
+ if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
+ // TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
+ // vector trip count being modeled in VPlan.
+ // Set the insert point after the last scalarized instruction or after the
+ // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
+ // will directly follow the scalar definitions.
+ auto NewIP =
+ isa<PHINode>(LastInst)
+ ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
+ : std::next(BasicBlock::iterator(LastInst));
+ Builder.SetInsertPoint(&*NewIP);
+ }
// However, if we are vectorizing, we need to construct the vector values.
// If the value is known to be uniform after vectorization, we can just
@@ -339,7 +343,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
} else {
// Initialize packing with insertelements to start from undef.
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
+ Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorValue(Def, Lane);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 404202b7f31304..dd0d2dfdd2e01c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1422,6 +1422,11 @@ class VPIRInstruction : public VPRecipeBase {
"Op must be an operand of the recipe");
return true;
}
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of ...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesModel updating IV users directly in VPlan, replace fixupIVUsers. This completes the transition towards modeling all live-outs directly in VPlan. Depends on #110004, Patch is 21.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112147.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1975df3cacbcae..e95797591bb297 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -542,11 +542,6 @@ class InnerLoopVectorizer {
protected:
friend class LoopVectorizationPlanner;
- /// Set up the values of the IVs correctly when exiting the vector loop.
- virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State);
-
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
@@ -775,10 +770,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
void printDebugTracesAtStart() override;
void printDebugTracesAtEnd() override;
-
- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State) override {};
};
// A specialized derived class of inner loop vectorizer that performs
@@ -2751,97 +2742,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
return LoopVectorPreHeader;
}
-// Fix up external users of the induction variable. At this point, we are
-// in LCSSA form, with all external PHIs that use the IV having one input value,
-// coming from the remainder loop. We need those PHIs to also have a correct
-// value for the IV when arriving directly from the middle block.
-void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
- const InductionDescriptor &II,
- Value *VectorTripCount,
- BasicBlock *MiddleBlock,
- VPTransformState &State) {
- // There are two kinds of external IV usages - those that use the value
- // computed in the last iteration (the PHI) and those that use the penultimate
- // value (the value that feeds into the phi from the loop latch).
- // We allow both, but they, obviously, have different values.
-
- DenseMap<Value *, Value *> MissingVals;
-
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
- OrigLoop->getLoopPreheader()))
- ->getIncomingValueForBlock(MiddleBlock);
-
- // An external user of the last iteration's value should see the value that
- // the remainder loop uses to initialize its own IV.
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
- for (User *U : PostInc->users()) {
- Instruction *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- MissingVals[UI] = EndValue;
- }
- }
-
- // An external user of the penultimate value need to see EndValue - Step.
- // The simplest way to get this is to recompute it from the constituent SCEVs,
- // that is Start + (Step * (CRD - 1)).
- for (User *U : OrigPhi->users()) {
- auto *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- IRBuilder<> B(MiddleBlock->getTerminator());
-
- // Fast-math-flags propagate from the original induction instruction.
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
-
- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
- assert(StepVPV && "step must have been expanded during VPlan execution");
- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
- : State.get(StepVPV, VPLane(0));
- Value *Escape = nullptr;
- if (EndValue->getType()->isIntegerTy())
- Escape = B.CreateSub(EndValue, Step);
- else if (EndValue->getType()->isPointerTy())
- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
- else {
- assert(EndValue->getType()->isFloatingPointTy() &&
- "Unexpected induction type");
- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
- Instruction::FAdd
- ? Instruction::FSub
- : Instruction::FAdd,
- EndValue, Step);
- }
- Escape->setName("ind.escape");
- MissingVals[UI] = Escape;
- }
- }
-
- assert((MissingVals.empty() ||
- all_of(MissingVals,
- [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
- return all_of(
- predecessors(cast<Instruction>(P.first)->getParent()),
- [MiddleBlock, this](BasicBlock *Pred) {
- return Pred == MiddleBlock ||
- Pred == OrigLoop->getLoopLatch();
- });
- })) &&
- "Expected escaping values from latch/middle.block only");
-
- for (auto &I : MissingVals) {
- PHINode *PHI = cast<PHINode>(I.first);
- // One corner case we have to handle is two IVs "chasing" each-other,
- // that is %IV2 = phi [...], [ %IV1, %latch ]
- // In this case, if IV1 has an external use, we need to avoid adding both
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
- // don't already have an incoming value for the middle block.
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
- PHI->addIncoming(I.second, MiddleBlock);
- }
-}
-
namespace {
struct CSEDenseMapInfo {
@@ -2986,24 +2886,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
for (PHINode &PN : Exit->phis())
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
- // No edge from the middle block to the unique exit block has been inserted
- // and there is nothing to fix from vector loop; phis should have incoming
- // from scalar loop only.
- } else {
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
- // the cost model.
-
- // If we inserted an edge from the middle block to the unique exit block,
- // update uses outside the loop (phis) to account for the newly inserted
- // edge.
-
- // Fix-up external users of the induction variables.
- for (const auto &Entry : Legal->getInductionVars())
- fixupIVUsers(Entry.first, Entry.second,
- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
- }
-
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
/// the end value of the induction.
-static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
- VPBuilder &VectorPHBuilder,
- VPBuilder &ScalarPHBuilder,
- VPTypeAnalysis &TypeInfo,
- VPValue *VectorTC) {
+static VPValue *addResumePhiRecipeForInduction(
+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
// Truncated wide inductions resume from the last lane of their vector value
// in the last vector iteration which is handled elsewhere.
@@ -8886,6 +8767,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
ScalarTypeOfWideIV);
}
+ EndValues[WideIV] = EndValue;
auto *ResumePhiRecipe =
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
/// Create resume phis in the scalar preheader for first-order recurrences,
/// reductions and inductions, and update the VPIRInstructions wrapping the
/// original phis in the scalar header.
-static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
+static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
+ Loop *OrigLoop,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
- &Plan.getVectorTripCount())) {
+ &Plan.getVectorTripCount(), EndValues)) {
ScalarPhiIRI->addOperand(ResumePhi);
continue;
}
@@ -8949,9 +8833,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
// modeled explicitly yet and won't be included. Those are un-truncated
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
// increments.
-static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
- const MapVector<PHINode *, InductionDescriptor> &Inductions) {
+static SetVector<VPIRInstruction *>
+collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
+ VPlan &Plan) {
auto *MiddleVPBB = Plan.getMiddleBlock();
SetVector<VPIRInstruction *> ExitUsersToFix;
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8976,18 +8860,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
// Exit values for inductions are computed and updated outside of VPlan
// and independent of induction recipes.
// TODO: Compute induction exit values in VPlan.
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
- isa<VPWidenPointerInductionRecipe>(V) ||
- (isa<Instruction>(IncomingValue) &&
- OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
- any_of(IncomingValue->users(), [&Inductions](User *U) {
- auto *P = dyn_cast<PHINode>(U);
- return P && Inductions.contains(P);
- }))) {
- if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
- continue;
- }
ExitUsersToFix.insert(ExitIRI);
ExitIRI->addOperand(V);
}
@@ -8996,17 +8868,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
return ExitUsersToFix;
}
+/// If \p Incoming is a user of a non-truncated induction, create recipes to
+/// compute the final value and update the user \p ExitIRI.
+static bool addInductionEndValue(
+ VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
+ (isa<Instruction>(Incoming->getUnderlyingValue()) &&
+ any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
+ [&Inductions](User *U) {
+ auto *P = dyn_cast<PHINode>(U);
+ return P && Inductions.contains(P);
+ }))) {
+ VPValue *IV;
+ if (auto *WideIV =
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
+ IV = WideIV;
+ else if (auto *WideIV =
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()
+ ->getOperand(0)
+ ->getDefiningRecipe()))
+ IV = WideIV;
+ else
+ IV = Incoming->getDefiningRecipe()->getOperand(1);
+ // Skip phi nodes already updated. This can be the case if 2 induction
+ // phis chase each other.
+ VPValue *EndValue = EndValues[IV];
+ if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
+ IsaPred<VPWidenIntOrFpInductionRecipe,
+ VPWidenPointerInductionRecipe>)) {
+ ExitIRI->setOperand(0, EndValue);
+ return true;
+ }
+
+ VPBuilder B(Plan.getMiddleBlock()->getTerminator());
+ VPValue *Escape = nullptr;
+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
+ VPValue *Step = WideIV->getStepValue();
+ Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
+ if (ScalarTy->isIntegerTy())
+ Escape =
+ B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
+ else if (ScalarTy->isPointerTy())
+ Escape = B.createPtrAdd(
+ EndValue,
+ B.createNaryOp(Instruction::Sub,
+ {Plan.getOrAddLiveIn(ConstantInt::get(
+ Step->getLiveInIRValue()->getType(), 0)),
+ Step}),
+ {}, "ind.escape");
+ else if (ScalarTy->isFloatingPointTy()) {
+ const auto &ID = WideIV->getInductionDescriptor();
+ Escape = B.createNaryOp(
+ ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
+ ? Instruction::FSub
+ : Instruction::FAdd,
+ {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
+ } else {
+ llvm_unreachable("all possible induction types must be handled");
+ }
+ ExitIRI->setOperand(0, Escape);
+ return true;
+ }
+ return false;
+}
// Add exit values to \p Plan. Extracts are added for each entry in \p
// ExitUsersToFix if needed and their operands are updated. Returns true if all
// exit users can be handled, otherwise return false.
-static bool
-addUsersInExitBlocks(VPlan &Plan,
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
+static bool addUsersInExitBlocks(
+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
if (ExitUsersToFix.empty())
return true;
auto *MiddleVPBB = Plan.getMiddleBlock();
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
// Introduce extract for exiting values and update the VPIRInstructions
// modeling the corresponding LCSSA phis.
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan,
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
return false;
+ VPValue *Incoming = ExitIRI->getOperand(0);
+ if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues,
+ TypeInfo))
+ continue;
+
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
IntegerType::get(Ctx, 32), 1))});
- ExitIRI->setOperand(Idx, Ext);
+ ExitIRI->setOperand(0, Ext);
}
}
return true;
@@ -9307,11 +9253,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPlanTransforms::handleUncountableEarlyExit(
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
+ DenseMap<VPValue *, VPValue *> EndValues;
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
+ SetVector<VPIRInstruction *> ExitUsersToFix =
+ collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
- if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
+ if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, Legal->getInductionVars(),
+ EndValues)) {
reportVectorizationFailure(
"Some exit values in loop with uncountable exit not supported yet",
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
@@ -9438,7 +9386,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
+ DenseMap<VPValue *, VPValue *> EndValues;
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 9a082921d4f7f2..d763a9a605294b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -315,16 +315,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}
- auto *LastInst = cast<Instruction>(get(Def, LastLane));
- // Set the insert point after the last scalarized instruction or after the
- // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
- // will directly follow the scalar definitions.
+ auto *LastDef = get(Def, LastLane);
auto OldIP = Builder.saveIP();
- auto NewIP =
- isa<PHINode>(LastInst)
- ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
- : std::next(BasicBlock::iterator(LastInst));
- Builder.SetInsertPoint(&*NewIP);
+ if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
+ // TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
+ // vector trip count being modeled in VPlan.
+ // Set the insert point after the last scalarized instruction or after the
+ // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
+ // will directly follow the scalar definitions.
+ auto NewIP =
+ isa<PHINode>(LastInst)
+ ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
+ : std::next(BasicBlock::iterator(LastInst));
+ Builder.SetInsertPoint(&*NewIP);
+ }
// However, if we are vectorizing, we need to construct the vector values.
// If the value is known to be uniform after vectorization, we can just
@@ -339,7 +343,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
} else {
// Initialize packing with insertelements to start from undef.
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
+ Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorValue(Def, Lane);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 404202b7f31304..dd0d2dfdd2e01c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1422,6 +1422,11 @@ class VPIRInstruction : public VPRecipeBase {
"Op must be an operand of the recipe");
return true;
}
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of ...
[truncated]
|
@llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesModel updating IV users directly in VPlan, replace fixupIVUsers. This completes the transition towards modeling all live-outs directly in VPlan. Depends on #110004, Patch is 21.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112147.diff 5 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1975df3cacbcae..e95797591bb297 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -542,11 +542,6 @@ class InnerLoopVectorizer {
protected:
friend class LoopVectorizationPlanner;
- /// Set up the values of the IVs correctly when exiting the vector loop.
- virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State);
-
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
@@ -775,10 +770,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
void printDebugTracesAtStart() override;
void printDebugTracesAtEnd() override;
-
- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State) override {};
};
// A specialized derived class of inner loop vectorizer that performs
@@ -2751,97 +2742,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
return LoopVectorPreHeader;
}
-// Fix up external users of the induction variable. At this point, we are
-// in LCSSA form, with all external PHIs that use the IV having one input value,
-// coming from the remainder loop. We need those PHIs to also have a correct
-// value for the IV when arriving directly from the middle block.
-void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
- const InductionDescriptor &II,
- Value *VectorTripCount,
- BasicBlock *MiddleBlock,
- VPTransformState &State) {
- // There are two kinds of external IV usages - those that use the value
- // computed in the last iteration (the PHI) and those that use the penultimate
- // value (the value that feeds into the phi from the loop latch).
- // We allow both, but they, obviously, have different values.
-
- DenseMap<Value *, Value *> MissingVals;
-
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
- OrigLoop->getLoopPreheader()))
- ->getIncomingValueForBlock(MiddleBlock);
-
- // An external user of the last iteration's value should see the value that
- // the remainder loop uses to initialize its own IV.
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
- for (User *U : PostInc->users()) {
- Instruction *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- MissingVals[UI] = EndValue;
- }
- }
-
- // An external user of the penultimate value need to see EndValue - Step.
- // The simplest way to get this is to recompute it from the constituent SCEVs,
- // that is Start + (Step * (CRD - 1)).
- for (User *U : OrigPhi->users()) {
- auto *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- IRBuilder<> B(MiddleBlock->getTerminator());
-
- // Fast-math-flags propagate from the original induction instruction.
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
-
- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
- assert(StepVPV && "step must have been expanded during VPlan execution");
- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
- : State.get(StepVPV, VPLane(0));
- Value *Escape = nullptr;
- if (EndValue->getType()->isIntegerTy())
- Escape = B.CreateSub(EndValue, Step);
- else if (EndValue->getType()->isPointerTy())
- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
- else {
- assert(EndValue->getType()->isFloatingPointTy() &&
- "Unexpected induction type");
- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
- Instruction::FAdd
- ? Instruction::FSub
- : Instruction::FAdd,
- EndValue, Step);
- }
- Escape->setName("ind.escape");
- MissingVals[UI] = Escape;
- }
- }
-
- assert((MissingVals.empty() ||
- all_of(MissingVals,
- [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
- return all_of(
- predecessors(cast<Instruction>(P.first)->getParent()),
- [MiddleBlock, this](BasicBlock *Pred) {
- return Pred == MiddleBlock ||
- Pred == OrigLoop->getLoopLatch();
- });
- })) &&
- "Expected escaping values from latch/middle.block only");
-
- for (auto &I : MissingVals) {
- PHINode *PHI = cast<PHINode>(I.first);
- // One corner case we have to handle is two IVs "chasing" each-other,
- // that is %IV2 = phi [...], [ %IV1, %latch ]
- // In this case, if IV1 has an external use, we need to avoid adding both
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
- // don't already have an incoming value for the middle block.
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
- PHI->addIncoming(I.second, MiddleBlock);
- }
-}
-
namespace {
struct CSEDenseMapInfo {
@@ -2986,24 +2886,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
for (PHINode &PN : Exit->phis())
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
- // No edge from the middle block to the unique exit block has been inserted
- // and there is nothing to fix from vector loop; phis should have incoming
- // from scalar loop only.
- } else {
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
- // the cost model.
-
- // If we inserted an edge from the middle block to the unique exit block,
- // update uses outside the loop (phis) to account for the newly inserted
- // edge.
-
- // Fix-up external users of the induction variables.
- for (const auto &Entry : Legal->getInductionVars())
- fixupIVUsers(Entry.first, Entry.second,
- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
- }
-
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
/// the end value of the induction.
-static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
- VPBuilder &VectorPHBuilder,
- VPBuilder &ScalarPHBuilder,
- VPTypeAnalysis &TypeInfo,
- VPValue *VectorTC) {
+static VPValue *addResumePhiRecipeForInduction(
+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
// Truncated wide inductions resume from the last lane of their vector value
// in the last vector iteration which is handled elsewhere.
@@ -8886,6 +8767,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
ScalarTypeOfWideIV);
}
+ EndValues[WideIV] = EndValue;
auto *ResumePhiRecipe =
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
/// Create resume phis in the scalar preheader for first-order recurrences,
/// reductions and inductions, and update the VPIRInstructions wrapping the
/// original phis in the scalar header.
-static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
+static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
+ Loop *OrigLoop,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
- &Plan.getVectorTripCount())) {
+ &Plan.getVectorTripCount(), EndValues)) {
ScalarPhiIRI->addOperand(ResumePhi);
continue;
}
@@ -8949,9 +8833,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
// modeled explicitly yet and won't be included. Those are un-truncated
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
// increments.
-static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
- const MapVector<PHINode *, InductionDescriptor> &Inductions) {
+static SetVector<VPIRInstruction *>
+collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
+ VPlan &Plan) {
auto *MiddleVPBB = Plan.getMiddleBlock();
SetVector<VPIRInstruction *> ExitUsersToFix;
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8976,18 +8860,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
// Exit values for inductions are computed and updated outside of VPlan
// and independent of induction recipes.
// TODO: Compute induction exit values in VPlan.
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
- isa<VPWidenPointerInductionRecipe>(V) ||
- (isa<Instruction>(IncomingValue) &&
- OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
- any_of(IncomingValue->users(), [&Inductions](User *U) {
- auto *P = dyn_cast<PHINode>(U);
- return P && Inductions.contains(P);
- }))) {
- if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
- continue;
- }
ExitUsersToFix.insert(ExitIRI);
ExitIRI->addOperand(V);
}
@@ -8996,17 +8868,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
return ExitUsersToFix;
}
+/// If \p Incoming is a user of a non-truncated induction, create recipes to
+/// compute the final value and update the user \p ExitIRI.
+static bool addInductionEndValue(
+ VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
+ (isa<Instruction>(Incoming->getUnderlyingValue()) &&
+ any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
+ [&Inductions](User *U) {
+ auto *P = dyn_cast<PHINode>(U);
+ return P && Inductions.contains(P);
+ }))) {
+ VPValue *IV;
+ if (auto *WideIV =
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
+ IV = WideIV;
+ else if (auto *WideIV =
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()
+ ->getOperand(0)
+ ->getDefiningRecipe()))
+ IV = WideIV;
+ else
+ IV = Incoming->getDefiningRecipe()->getOperand(1);
+ // Skip phi nodes already updated. This can be the case if 2 induction
+ // phis chase each other.
+ VPValue *EndValue = EndValues[IV];
+ if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
+ IsaPred<VPWidenIntOrFpInductionRecipe,
+ VPWidenPointerInductionRecipe>)) {
+ ExitIRI->setOperand(0, EndValue);
+ return true;
+ }
+
+ VPBuilder B(Plan.getMiddleBlock()->getTerminator());
+ VPValue *Escape = nullptr;
+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
+ VPValue *Step = WideIV->getStepValue();
+ Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
+ if (ScalarTy->isIntegerTy())
+ Escape =
+ B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
+ else if (ScalarTy->isPointerTy())
+ Escape = B.createPtrAdd(
+ EndValue,
+ B.createNaryOp(Instruction::Sub,
+ {Plan.getOrAddLiveIn(ConstantInt::get(
+ Step->getLiveInIRValue()->getType(), 0)),
+ Step}),
+ {}, "ind.escape");
+ else if (ScalarTy->isFloatingPointTy()) {
+ const auto &ID = WideIV->getInductionDescriptor();
+ Escape = B.createNaryOp(
+ ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
+ ? Instruction::FSub
+ : Instruction::FAdd,
+ {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
+ } else {
+ llvm_unreachable("all possible induction types must be handled");
+ }
+ ExitIRI->setOperand(0, Escape);
+ return true;
+ }
+ return false;
+}
// Add exit values to \p Plan. Extracts are added for each entry in \p
// ExitUsersToFix if needed and their operands are updated. Returns true if all
// exit users can be handled, otherwise return false.
-static bool
-addUsersInExitBlocks(VPlan &Plan,
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
+static bool addUsersInExitBlocks(
+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
if (ExitUsersToFix.empty())
return true;
auto *MiddleVPBB = Plan.getMiddleBlock();
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
// Introduce extract for exiting values and update the VPIRInstructions
// modeling the corresponding LCSSA phis.
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan,
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
return false;
+ VPValue *Incoming = ExitIRI->getOperand(0);
+ if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues,
+ TypeInfo))
+ continue;
+
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
IntegerType::get(Ctx, 32), 1))});
- ExitIRI->setOperand(Idx, Ext);
+ ExitIRI->setOperand(0, Ext);
}
}
return true;
@@ -9307,11 +9253,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPlanTransforms::handleUncountableEarlyExit(
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
+ DenseMap<VPValue *, VPValue *> EndValues;
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
+ SetVector<VPIRInstruction *> ExitUsersToFix =
+ collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
- if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
+ if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, Legal->getInductionVars(),
+ EndValues)) {
reportVectorizationFailure(
"Some exit values in loop with uncountable exit not supported yet",
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
@@ -9438,7 +9386,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
+ DenseMap<VPValue *, VPValue *> EndValues;
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 9a082921d4f7f2..d763a9a605294b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -315,16 +315,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}
- auto *LastInst = cast<Instruction>(get(Def, LastLane));
- // Set the insert point after the last scalarized instruction or after the
- // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
- // will directly follow the scalar definitions.
+ auto *LastDef = get(Def, LastLane);
auto OldIP = Builder.saveIP();
- auto NewIP =
- isa<PHINode>(LastInst)
- ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
- : std::next(BasicBlock::iterator(LastInst));
- Builder.SetInsertPoint(&*NewIP);
+ if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
+ // TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
+ // vector trip count being modeled in VPlan.
+ // Set the insert point after the last scalarized instruction or after the
+ // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
+ // will directly follow the scalar definitions.
+ auto NewIP =
+ isa<PHINode>(LastInst)
+ ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
+ : std::next(BasicBlock::iterator(LastInst));
+ Builder.SetInsertPoint(&*NewIP);
+ }
// However, if we are vectorizing, we need to construct the vector values.
// If the value is known to be uniform after vectorization, we can just
@@ -339,7 +343,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
} else {
// Initialize packing with insertelements to start from undef.
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
+ Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorValue(Def, Lane);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 404202b7f31304..dd0d2dfdd2e01c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1422,6 +1422,11 @@ class VPIRInstruction : public VPRecipeBase {
"Op must be an operand of the recipe");
return true;
}
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of ...
[truncated]
|
Ping :) Now updated after landing #112145, should be ready for review |
|
||
VPValue *Incoming = ExitIRI->getOperand(0); | ||
if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues, | ||
TypeInfo)) | ||
continue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would using Idx instead of 0 be necessary for supporting two exiting blocks?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, restored using Idx
, although the case currently cannot happen yet
// Exit values for inductions are computed and updated outside of VPlan | ||
// and independent of induction recipes. | ||
// TODO: Compute induction exit values in VPlan. | ||
if ((isa<VPWidenIntOrFpInductionRecipe>(V) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the comment stale after the patch?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gone thanks
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, | |||
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the | |||
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute | |||
/// the end value of the induction. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also explain recording \p EndValues.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, | |||
/// Create resume phis in the scalar preheader for first-order recurrences, | |||
/// reductions and inductions, and update the VPIRInstructions wrapping the | |||
/// original phis in the scalar header. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also explain recording \p EndValues.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { | |||
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) { | |||
if (VPValue *ResumePhi = addResumePhiRecipeForInduction( | |||
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, | |||
&Plan.getVectorTripCount())) { | |||
&Plan.getVectorTripCount(), EndValues)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
&Plan.getVectorTripCount(), EndValues)) { | |
&Plan.getVectorTripCount())) { | |
EndValues[WideIVR] = first operand of ResumePhi |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, | |||
/// Create resume phis in the scalar preheader for first-order recurrences, | |||
/// reductions and inductions, and update the VPIRInstructions wrapping the | |||
/// original phis in the scalar header. | |||
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { | |||
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, | |||
Loop *OrigLoop, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why/Is OrigLoop needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not needed in the latest version, removed, thanks
@@ -230,7 +230,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur | |||
; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1 | |||
; CHECK-NEXT: No successors | |||
; CHECK-NEXT: } | |||
; CHECK: LV: Loop does not require scalar epilogue |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this CHECK (and the one below) need to be removed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This message got printed multiple times when requiresScalarEpilogue
and we removed a caller, hence the test update
IsaPred<VPWidenIntOrFpInductionRecipe, | ||
VPWidenPointerInductionRecipe>)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IsaPred<VPWidenIntOrFpInductionRecipe, | |
VPWidenPointerInductionRecipe>)) { | |
IsaPred<VPWidenInductionRecipe>)) { |
|
||
VPBuilder B(Plan.getMiddleBlock()->getTerminator()); | ||
VPValue *Escape = nullptr; | ||
auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: define IV
as such to begin with?
if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) && | ||
!cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) || | ||
isa<VPWidenPointerInductionRecipe>(Incoming) || | ||
(isa<Instruction>(Incoming->getUnderlyingValue()) && | ||
any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(), | ||
[&Inductions](User *U) { | ||
auto *P = dyn_cast<PHINode>(U); | ||
return P && Inductions.contains(P); | ||
}))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better early exit?
Better as one precondition here rather than distributed below?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Completely reworked the code to add the induction end values as VPlan transforms, splitting off the logic to retrieve the wide IV to df4a615 including determining if incoming is an optimizable IV use completely in VPlan.
if (ScalarTy->isIntegerTy()) | ||
Escape = | ||
B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape"); | ||
else if (ScalarTy->isPointerTy()) | ||
Escape = B.createPtrAdd( | ||
EndValue, | ||
B.createNaryOp(Instruction::Sub, | ||
{Plan.getOrAddLiveIn(ConstantInt::get( | ||
Step->getLiveInIRValue()->getType(), 0)), | ||
Step}), | ||
{}, "ind.escape"); | ||
else if (ScalarTy->isFloatingPointTy()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(independent)
if (ScalarTy->isIntegerTy()) | |
Escape = | |
B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape"); | |
else if (ScalarTy->isPointerTy()) | |
Escape = B.createPtrAdd( | |
EndValue, | |
B.createNaryOp(Instruction::Sub, | |
{Plan.getOrAddLiveIn(ConstantInt::get( | |
Step->getLiveInIRValue()->getType(), 0)), | |
Step}), | |
{}, "ind.escape"); | |
else if (ScalarTy->isFloatingPointTy()) { | |
if (ScalarTy->isIntegerTy()) { | |
Escape = | |
B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape"); | |
} else if (ScalarTy->isPointerTy()) { | |
Escape = B.createPtrAdd( | |
auto *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Step->getLiveInIRValue()->getType(), 0)); | |
EndValue, | |
B.createNaryOp(Instruction::Sub, {Zero, Step}), {}, "ind.escape"); | |
} else if (ScalarTy->isFloatingPointTy()) { |
could also add a "Negate" opcode, analogous to "Not".
Step}), | ||
{}, "ind.escape"); | ||
else if (ScalarTy->isFloatingPointTy()) { | ||
const auto &ID = WideIV->getInductionDescriptor(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(independent)
const auto &ID = WideIV->getInductionDescriptor(); | |
const auto &ID = WideIV->getInductionDescriptor(); | |
auto *ReversedOpcode = ID.getInductionBinOp()->getOpcode() == Instruction::FAdd | |
? Instruction::FSub | |
: Instruction::FAdd; |
?
Model updating IV users directly in VPlan, replace fixupIVUsers. Depends on llvm#110004, llvm#109975 and llvm#112145.
Check the VPlan directly to determine if a VPValue is an optimiziable IV or IV use instead of checking the underlying IR instructions. Split off from #112147. This refactoring enables moving IV end value creation from the legacy fixupIVUsers to a VPlan-based transform. There is one case we now won't optimize, that is IVs with subtracts and non-constant steps. But as this is a minor optimization and doesn't impact correctness, the benefits of performing the check in VPlan should outweigh the missed case.
9b6fa7c
to
a2ba00a
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reworked the code to add the induction end values as VPlan transforms
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, | |||
/// Create resume phis in the scalar preheader for first-order recurrences, | |||
/// reductions and inductions, and update the VPIRInstructions wrapping the | |||
/// original phis in the scalar header. | |||
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { | |||
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, | |||
Loop *OrigLoop, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not needed in the latest version, removed, thanks
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV, | |||
/// Create resume phis in the scalar preheader for first-order recurrences, | |||
/// reductions and inductions, and update the VPIRInstructions wrapping the | |||
/// original phis in the scalar header. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) { | |||
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) { | |||
if (VPValue *ResumePhi = addResumePhiRecipeForInduction( | |||
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, | |||
&Plan.getVectorTripCount())) { | |||
&Plan.getVectorTripCount(), EndValues)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
// Exit values for inductions are computed and updated outside of VPlan | ||
// and independent of induction recipes. | ||
// TODO: Compute induction exit values in VPlan. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gone, thanks
// Exit values for inductions are computed and updated outside of VPlan | ||
// and independent of induction recipes. | ||
// TODO: Compute induction exit values in VPlan. | ||
if ((isa<VPWidenIntOrFpInductionRecipe>(V) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gone thanks
LLVMContext &Ctx = ExitIRI->getInstruction().getContext(); | ||
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd, | ||
{Op, Plan.getOrAddLiveIn(ConstantInt::get( | ||
IntegerType::get(Ctx, 32), 1))}); | ||
ExitIRI->setOperand(Idx, Ext); | ||
ExitIRI->setOperand(0, Ext); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should still use Idx
for completeness, restored, thanks
|
||
VPValue *Incoming = ExitIRI->getOperand(0); | ||
if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues, | ||
TypeInfo)) | ||
continue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, restored using Idx
, although the case currently cannot happen yet
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan, | |||
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB) | |||
return false; | |||
|
|||
VPValue *Incoming = ExitIRI->getOperand(0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the code to a VPlan transform, as to optimize the exit users.
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, | |||
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the | |||
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute | |||
/// the end value of the induction. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!
if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) && | ||
!cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) || | ||
isa<VPWidenPointerInductionRecipe>(Incoming) || | ||
(isa<Instruction>(Incoming->getUnderlyingValue()) && | ||
any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(), | ||
[&Inductions](User *U) { | ||
auto *P = dyn_cast<PHINode>(U); | ||
return P && Inductions.contains(P); | ||
}))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Completely reworked the code to add the induction end values as VPlan transforms, splitting off the logic to retrieve the wide IV to df4a615 including determining if incoming is an optimizable IV use completely in VPlan.
@@ -9060,12 +8878,14 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, | |||
// exit users can be handled, otherwise return false. | |||
static bool | |||
addUsersInExitBlocks(VPlan &Plan, | |||
const SetVector<VPIRInstruction *> &ExitUsersToFix) { | |||
const SetVector<VPIRInstruction *> &ExitUsersToFix, | |||
DenseMap<VPValue *, VPValue *> &IVEndValues) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The IVEndValues
parameter seems to be unused here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be gone now, thanks
Hi @fhahn, whilst attempting to rebase #120567 on top of this PR I noticed failures when building the LLVM test suite. Apologies in advance if you're already aware! Just for reference, the error I see is:
|
Legalize extract-from-ends using uniform VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so the correct end value is available. Fixes #121745.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ping :)
Hi @fhahn, whilst attempting to rebase #120567 on top of this PR I noticed failures when building the LLVM test suite. Apologies in advance if you're already aware! Just for reference, the error I see is:
clang: /home/davshe01/regression/llvm/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp:3138: virtual void llvm::VPWidenPointerInductionRecipe::execute(llvm::VPTransformState&): Assertion ` !onlyScalarsGenerated(State.VF.isScalable()) && "Recipe should have been replaced"' failed. ``
thanks, that should be fixed in the latest version by removing dead recipes after optimizing the exit users.
return true; | ||
} | ||
return false; | ||
} | ||
// Add exit values to \p Plan. Extracts are added for each entry in \p | ||
// ExitUsersToFix if needed and their operands are updated. Returns true if all | ||
// exit users can be handled, otherwise return false. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gone now
@@ -9060,12 +8878,14 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, | |||
// exit users can be handled, otherwise return false. | |||
static bool | |||
addUsersInExitBlocks(VPlan &Plan, | |||
const SetVector<VPIRInstruction *> &ExitUsersToFix) { | |||
const SetVector<VPIRInstruction *> &ExitUsersToFix, | |||
DenseMap<VPValue *, VPValue *> &IVEndValues) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be gone now, thanks
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, | ||
&Plan.getVectorTripCount())) { | ||
IVEndValues[WideIVR] = ResumePhi->getOperand(0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: good to assert we know what we're taking the first operand of
IVEndValues[WideIVR] = ResumePhi->getOperand(0); | |
assert(ResumePhi->getOpcode == VPInstruction::ResumePhi && "Expected a ResumePhi"); | |
IVEndValues[WideIVR] = ResumePhi->getOperand(0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
DenseMap<VPValue *, VPValue *> IVEndValues; | ||
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these handled when building native VPlan? Otherwise better pass null to avoid recording, or define as return value - to be ignored here, instead of collecting them uselessly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They are not used at the moment , worth adjusting the API and complicating the implementation to avoid recording for native path? Added a comment
@@ -1416,6 +1416,11 @@ class VPIRInstruction : public VPRecipeBase { | |||
"Op must be an operand of the recipe"); | |||
return true; | |||
} | |||
bool onlyFirstLaneUsed(const VPValue *Op) const override { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
bool onlyFirstLaneUsed(const VPValue *Op) const override { | |
bool onlyFirstLaneUsed(const VPValue *Op) const override { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks!
if (isa<VPWidenPointerInductionRecipe>(WideIV) || | ||
!cast<VPWidenIntOrFpInductionRecipe>(WideIV)->getTruncInst()) | ||
return WideIV; | ||
return nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit, can alternatively do:
if (isa<VPWidenPointerInductionRecipe>(WideIV) || | |
!cast<VPWidenIntOrFpInductionRecipe>(WideIV)->getTruncInst()) | |
return WideIV; | |
return nullptr; | |
auto *IntOrFpIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV); | |
return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV; |
consistent with the last return at the bottom.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
/// Return a wide IV, if \p VPV is an optimizable wide IV or wide IV use. That | ||
/// is, if \p VPV is either an untruncated wide induction, or if it increments a | ||
/// wide induction by its step. | ||
static VPWidenInductionRecipe *isOptimizableIVOrUse(VPValue *VPV) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
static VPWidenInductionRecipe *isOptimizableIVOrUse(VPValue *VPV) { | |
static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) { |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
// The simplest way to get this is to recompute it from the constituent SCEVs, | ||
// that is Start + (Step * (CRD - 1)). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This part is obsolete.
// Truncated wide inductions resume from the last lane of their vector value | ||
// in the last vector iteration which is handled elsewhere. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regarding this being "handled elsewhere", should all exiting IV values be handled first by extracting the last lane, whether resumed by the scalar loop or used in the exit block? Then, as a separate (conceptually optional) subsequent optimization, replace such extractions by precomputing the end values, for both scalar loop and exit block users.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good, this effectively means updating all scalar VPIRINstructions wrapping phis together, using the same logic. Might be good to first land this patch and then consolidate the phi handling, to reduce the diff?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reducing diff here and following up is fine. Worth leaving behind a TODO somewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added the TODO to to place where we call addResumePhiRecipeForInduction
@@ -9397,7 +9216,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||
VPlanTransforms::handleUncountableEarlyExit( | |||
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder); | |||
} | |||
addScalarResumePhis(RecipeBuilder, *Plan); | |||
DenseMap<VPValue *, VPValue *> IVEndValues; | |||
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Exiting IV values can be optimized for both out-of-loop users, both those reaching the scalar loop and those in the exit block. Rather than taking care of the former here and of the latter some 80+ LOC below, passing an IVEndValues
map of the end-values created here to be reused there, it would be better to optimize both out-of-loop users together.
Scalar loop resume values use the post-increment IV, i.e., the "end value" of the IV, whereas exit block users may use the post or pre increment IV. However, both types of exit block users also use the "end value" of the IV - the latter with an additional step taken backwards.
So how about doing the following: start with "legalizing" all exiting IV users to extract the last lane, as part of initial VPlan construction. Then, as part of optimizing VPlan, canonicalize exit-block users who use the pre-increment IV, to use the post-increment IV instead, followed with taking the additional step back (at the exit block). Finally, replace every extraction from a post-increment IV feeding out-of-loop users, with a precomputed end-value. WDYT, worth doing so, now or as follow-up?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good, as per comment above would probably be good to do as follow-up, as it would probably further increase the diff if pulled in here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok. Worth leaving behind a TODO somewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added where addResumePhiRecipeForInduction is called, thanks
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] | ||
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] | ||
; CHECK: middle.split: | ||
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] | ||
; CHECK: middle.block: | ||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now missing this opportunity to optimize into an end value?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, currently we don't try to optimize if there are multiple exits, although we could add some extra code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good to note at commit message that some opportunities may be lost, and leave some TODO to regain them?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will do thanks!
; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 | ||
; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] | ||
; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float | ||
; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that fast
is used and required - optimizing an exiting FP IV value by precomputing it using multiplication could change its value, compared to the value obtained by extracting the last lane - which may also differ from the original exiting value due to vectoring the FP IV by VF=2...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ak
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, adding final comments.
@@ -9675,7 +9498,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { | |||
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R); | |||
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); | |||
} | |||
addScalarResumePhis(RecipeBuilder, *Plan); | |||
DenseMap<VPValue *, VPValue *> IVEndValues; | |||
// IVEndValues are not used yet in the native path. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
// IVEndValues are not used yet in the native path. | |
// TODO: IVEndValues are not used yet in the native path, to optimize exit values. |
auto OldIP = Builder.saveIP(); | ||
// TODO: Remove once VPDerivedRecipe can be simplified, which requires | ||
// vector trip count being modeled in VPlan. | ||
// Set the insert point after the last scalarized instruction or after the | ||
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence | ||
// will directly follow the scalar definitions. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Retain current code?
auto OldIP = Builder.saveIP(); | |
// TODO: Remove once VPDerivedRecipe can be simplified, which requires | |
// vector trip count being modeled in VPlan. | |
// Set the insert point after the last scalarized instruction or after the | |
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence | |
// will directly follow the scalar definitions. | |
// Set the insert point after the last scalarized instruction or after the | |
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence | |
// will directly follow the scalar definitions. | |
auto OldIP = Builder.saveIP(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
// Truncated wide inductions resume from the last lane of their vector value | ||
// in the last vector iteration which is handled elsewhere. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reducing diff here and following up is fine. Worth leaving behind a TODO somewhere.
@@ -9397,7 +9216,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |||
VPlanTransforms::handleUncountableEarlyExit( | |||
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder); | |||
} | |||
addScalarResumePhis(RecipeBuilder, *Plan); | |||
DenseMap<VPValue *, VPValue *> IVEndValues; | |||
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok. Worth leaving behind a TODO somewhere.
return nullptr; | ||
} | ||
|
||
// Check if VPV is an optimizable induction increment. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, this is indeed confusing; should fixHeaderPhis()
also hookup the operand across the backedge of induction header phi recipes, as it does for reductions and FORs? Thereby supporting its VPHeaderPHIRecipe::getBackedgeValue()
API. Admittedly, this may contradict #82270(?)
if (!WideIV) | ||
continue; | ||
VPValue *EndValue = EndValues.lookup(WideIV); | ||
if (!EndValue) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I.e., are all optimizable WideIV's expected to have their end values precomputed?
VPValue *Escape = nullptr; | ||
VPValue *Step = WideIV->getStepValue(); | ||
Type *ScalarTy = TypeInfo.inferScalarType(WideIV); | ||
if (ScalarTy->isIntegerTy()) { | ||
Escape = | ||
B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape"); | ||
} else if (ScalarTy->isPointerTy()) { | ||
auto *Zero = Plan.getOrAddLiveIn( | ||
ConstantInt::get(Step->getLiveInIRValue()->getType(), 0)); | ||
Escape = B.createPtrAdd(EndValue, | ||
B.createNaryOp(Instruction::Sub, {Zero, Step}), | ||
{}, "ind.escape"); | ||
} else if (ScalarTy->isFloatingPointTy()) { | ||
const auto &ID = WideIV->getInductionDescriptor(); | ||
Escape = B.createNaryOp( | ||
ID.getInductionBinOp()->getOpcode() == Instruction::FAdd | ||
? Instruction::FSub | ||
: Instruction::FAdd, | ||
{EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()}); | ||
} else { | ||
llvm_unreachable("all possible induction types must be handled"); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Part of TODO?
@@ -137,6 +137,10 @@ struct VPlanTransforms { | |||
|
|||
/// Lower abstract recipes to concrete ones, that can be codegen'd. | |||
static void convertToConcreteRecipes(VPlan &Plan); | |||
|
|||
static void |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something like
static void | |
/// If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them precomputed end values instead, possibly taken one step backwards. | |
static void |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks!
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] | ||
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] | ||
; CHECK: middle.split: | ||
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] | ||
; CHECK: middle.block: | ||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good to note at commit message that some opportunities may be lost, and leave some TODO to regain them?
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/11/builds/11248 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/7650 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/76/builds/6212 Here is the relevant piece of the build log for the reference
|
This reverts commit c2d15ac. Causes build failures on PPC stage2 & fuchsia bots https://lab.llvm.org/buildbot/#/builders/168/builds/7650 https://lab.llvm.org/buildbot/#/builders/11/builds/11248
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/55/builds/5753 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/13088 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/24/builds/4344 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/85/builds/4497 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/164/builds/6571 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/25/builds/5748 Here is the relevant piece of the build log for the reference
|
…terVector. In preparation for re-landing llvm/llvm-project#112147, also consider VPDerivedIVRecipe and VPInstructions with binary opcodes and PtrAdd with all uniform operands as uniform themselves. Effectively NFC, but will be exercised once #112147 re-lands.
Tests for crash caused by initial version of #112147.
This reverts the revert commit 58326f1. The build failure in sanitizer stage2 builds has been fixed with 0d39fe6. Original commit message: Model updating IV users directly in VPlan, replace fixupIVUsers. Now simple extracts are created for all phis in the exit block during initial VPlan construction. A later VPlan transform (optimizeInductionExitUsers) replaces extracts of inductions with their pre-computed values if possible. This completes the transition towards modeling all live-outs directly in VPlan. There are a few follow-ups: * emit extracts initially also for resume phis, and optimize them tougher with IV exit users * support for VPlans with multiple exits in optimizeInductionExitUsers. Depends on #110004, #109975 and #112145.
…an e removed. Tests for crash caused by initial version of llvm/llvm-project#112147.
Follow-up as discussed when using VPInstruction::ResumePhi for all resume values (#112147). This patch explicitly adds incoming values for each predecessor in VPlan. This simplifies codegen and allows transformations adjusting the predecessors of blocks with NFC modulo incoming block order in phis.
Follow-up as discussed when using VPInstruction::ResumePhi for all resume values (llvm#112147). This patch explicitly adds incoming values for each predecessor in VPlan. This simplifies codegen and allows transformations adjusting the predecessors of blocks with NFC modulo incoming block order in phis.
Model updating IV users directly in VPlan, replace fixupIVUsers.
This completes the transition towards modeling all live-outs directly in VPlan.
Depends on #110004,
#109975 and
#112145.