-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[VPlan] Introduce ResumePhi VPInstruction, use to create phi for FOR. #94760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1ea4a7c
bd6fe32
9f53c00
f028ab3
94672b5
3716289
ae77a2a
d19016f
24133b7
5e61cb4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -599,10 +599,6 @@ class InnerLoopVectorizer { | |
BasicBlock *MiddleBlock, BasicBlock *VectorHeader, | ||
VPlan &Plan, VPTransformState &State); | ||
|
||
/// Create the phi node for the resume value of first order recurrences in the | ||
/// scalar preheader and update the users in the scalar loop. | ||
void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State); | ||
|
||
/// Iteratively sink the scalarized operands of a predicated instruction into | ||
/// the block that was created for it. | ||
void sinkScalarOperands(Instruction *PredInst); | ||
|
@@ -3286,19 +3282,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, | |
if (EnableVPlanNativePath) | ||
fixNonInductionPHIs(Plan, State); | ||
|
||
// At this point every instruction in the original loop is widened to a | ||
// vector form. Note that fixing reduction phis, as well as extracting the | ||
// exit and resume values for fixed-order recurrences are already modeled in | ||
// VPlan. All that remains to do here is to create a phi in the scalar | ||
// pre-header for each fixed-order recurrence resume value. | ||
// TODO: Also model creating phis in the scalar pre-header in VPlan. | ||
for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) { | ||
if (!Legal->isFixedOrderRecurrence(LO->getPhi())) | ||
continue; | ||
fixFixedOrderRecurrence(LO, State); | ||
Plan.removeLiveOut(LO->getPhi()); | ||
} | ||
|
||
// Forget the original basic block. | ||
PSE.getSE()->forgetLoop(OrigLoop); | ||
PSE.getSE()->forgetBlockAndLoopDispositions(); | ||
|
@@ -3335,10 +3318,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, | |
VectorLoop->getHeader(), Plan, State); | ||
} | ||
|
||
// Fix LCSSA phis not already fixed earlier. Extracts may need to be generated | ||
// in the exit block, so update the builder. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good to retain the first sentence:
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||
State.Builder.SetInsertPoint(State.CFG.ExitBB, | ||
State.CFG.ExitBB->getFirstNonPHIIt()); | ||
// Fix live-out phis not already fixed earlier. | ||
for (const auto &KV : Plan.getLiveOuts()) | ||
KV.second->fixPhi(Plan, State); | ||
|
||
|
@@ -3366,32 +3346,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, | |
VF.getKnownMinValue() * UF); | ||
} | ||
|
||
void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit (independent): is the second part of the following comment from above along with setting of insertion point still relevant?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved setting the insert point to VPLiveOut based on the predecessor. |
||
VPTransformState &State) { | ||
// Extract the last vector element in the middle block. This will be the | ||
// initial value for the recurrence when jumping to the scalar loop. | ||
VPValue *VPExtract = LO->getOperand(0); | ||
using namespace llvm::VPlanPatternMatch; | ||
assert(match(VPExtract, m_VPInstruction<VPInstruction::ExtractFromEnd>( | ||
m_VPValue(), m_VPValue())) && | ||
"FOR LiveOut expects to use an extract from end."); | ||
Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true); | ||
|
||
// Fix the initial value of the original recurrence in the scalar loop. | ||
PHINode *ScalarHeaderPhi = LO->getPhi(); | ||
auto *InitScalarFOR = | ||
ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader); | ||
Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin()); | ||
auto *ScalarPreheaderPhi = | ||
Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init"); | ||
for (auto *BB : predecessors(LoopScalarPreHeader)) { | ||
auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR; | ||
ScalarPreheaderPhi->addIncoming(Incoming, BB); | ||
} | ||
ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader, | ||
ScalarPreheaderPhi); | ||
Comment on lines
-3391
to
-3392
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that this incoming is set here, rather than by VPLiveIn's fixPhi. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ScalarPreheaderPhi is being generated by ResumePhi instead, and is being set here as the incoming value of ScalarHeaderPhi - w/o any vector-to-scalar extraction - so can ResumePhi (in its current use) entail a vector-to-scalar extraction (as opposed to reusing it for feeding exit block phis)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment, ResumePhi won't generate any extracts, as it will be fed by the extracted scalar resume value of the FOR. Possibly more accurately to mark ResumePhi as only having its first lane used instead of vector-to-scalar. (Done in latest version) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. Would be good to simply indicate somehow that its dealing with a single scalar - coming in (from each pred) and going out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added an |
||
} | ||
|
||
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { | ||
// The basic block and loop containing the predicated instruction. | ||
auto *PredBB = PredInst->getParent(); | ||
|
@@ -8725,6 +8679,59 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop, | |
} | ||
} | ||
|
||
/// Feed a resume value for every FOR from the vector loop to the scalar loop, | ||
/// if middle block branches to scalar preheader, by introducing ExtractFromEnd | ||
/// and ResumePhi recipes in each, respectively, and a VPLiveOut which uses the | ||
/// latter and corresponds to the scalar header. | ||
static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) { | ||
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); | ||
|
||
// Start by finding out if middle block branches to scalar preheader, which is | ||
// not a VPIRBasicBlock, unlike Exit block - the other possible successor of | ||
// middle block. | ||
// TODO: Should be replaced by | ||
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the | ||
// scalar region is modeled as well. | ||
VPBasicBlock *ScalarPHVPBB = nullptr; | ||
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor()); | ||
for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) { | ||
if (isa<VPIRBasicBlock>(Succ)) | ||
continue; | ||
assert(!ScalarPHVPBB && "Two candidates for ScalarPHVPBB?"); | ||
ScalarPHVPBB = cast<VPBasicBlock>(Succ); | ||
} | ||
if (!ScalarPHVPBB) | ||
return; | ||
|
||
VPBuilder ScalarPHBuilder(ScalarPHVPBB); | ||
VPBuilder MiddleBuilder(MiddleVPBB); | ||
// Reset insert point so new recipes are inserted before terminator and | ||
// condition, if there is either the former or both. | ||
if (auto *Terminator = MiddleVPBB->getTerminator()) { | ||
auto *Condition = dyn_cast<VPInstruction>(Terminator->getOperand(0)); | ||
assert((!Condition || Condition->getParent() == MiddleVPBB) && | ||
"Condition expected in MiddleVPBB"); | ||
MiddleBuilder.setInsertPoint(Condition ? Condition : Terminator); | ||
} | ||
VPValue *OneVPV = Plan.getOrAddLiveIn( | ||
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); | ||
|
||
for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) { | ||
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi); | ||
if (!FOR) | ||
continue; | ||
|
||
// Extract the resume value and create a new VPLiveOut for it. | ||
auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd, | ||
{FOR->getBackedgeValue(), OneVPV}, | ||
{}, "vector.recur.extract"); | ||
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( | ||
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {}, | ||
"scalar.recur.init"); | ||
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), ResumePhiRecipe); | ||
} | ||
} | ||
|
||
VPlanPtr | ||
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | ||
|
||
|
@@ -8889,6 +8896,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |
"VPBasicBlock"); | ||
RecipeBuilder.fixHeaderPhis(); | ||
|
||
addLiveOutsForFirstOrderRecurrences(*Plan); | ||
|
||
// --------------------------------------------------------------------------- | ||
// Transform initial VPlan: Apply previously taken decisions, in order, to | ||
// bring the VPlan to its final state. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -697,7 +697,10 @@ class VPBlockBase { | |
}; | ||
|
||
/// A value that is used outside the VPlan. The operand of the user needs to be | ||
/// added to the associated LCSSA phi node. | ||
/// added to the associated phi node. The incoming block from VPlan is | ||
/// determined by where the VPValue is defined: if it is defined by a recipe | ||
/// outside a region, its parent block is used, otherwise the middle block is | ||
/// used. | ||
class VPLiveOut : public VPUser { | ||
PHINode *Phi; | ||
|
||
|
@@ -709,11 +712,10 @@ class VPLiveOut : public VPUser { | |
return U->getVPUserID() == VPUser::VPUserID::LiveOut; | ||
} | ||
|
||
/// Fixup the wrapped LCSSA phi node in the unique exit block. This simply | ||
/// means we need to add the appropriate incoming value from the middle | ||
/// block as exiting edges from the scalar epilogue loop (if present) are | ||
/// already in place, and we exit the vector loop exclusively to the middle | ||
/// block. | ||
/// Fix the wrapped phi node. This means adding an incoming value to exit | ||
/// block phi's from the vector loop via middle block (values from scalar loop | ||
/// already reach these phi's), and updating the value to scalar header phi's | ||
/// from the scalar preheader. | ||
void fixPhi(VPlan &Plan, VPTransformState &State); | ||
|
||
/// Returns true if the VPLiveOut uses scalars of operand \p Op. | ||
|
@@ -1237,6 +1239,11 @@ class VPInstruction : public VPRecipeWithIRFlags { | |
SLPStore, | ||
ActiveLaneMask, | ||
ExplicitVectorLength, | ||
/// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan. | ||
/// The first operand is the incoming value from the predecessor in VPlan, | ||
/// the second operand is the incoming value for all other predecessors | ||
/// (which are currently not modeled in VPlan). | ||
ResumePhi, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for renaming, worth updating the title and summary of the patch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Title + description should be updated now |
||
CalculateTripCountMinusVF, | ||
// Increment the canonical IV separately for each unrolled part. | ||
CanonicalIVIncrementForPart, | ||
|
@@ -1385,6 +1392,10 @@ class VPInstruction : public VPRecipeWithIRFlags { | |
/// Returns true if this VPInstruction produces a scalar value from a vector, | ||
/// e.g. by performing a reduction or extracting a lane. | ||
bool isVectorToScalar() const; | ||
|
||
/// Returns true if this VPInstruction's operands are single scalars and the | ||
/// result is also a single scalar. | ||
bool isSingleScalar() const; | ||
}; | ||
|
||
/// VPWidenRecipe is a recipe for producing a copy of vector type its | ||
|
@@ -3736,7 +3747,7 @@ inline bool isUniformAfterVectorization(VPValue *VPV) { | |
if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def)) | ||
return all_of(GEP->operands(), isUniformAfterVectorization); | ||
if (auto *VPI = dyn_cast<VPInstruction>(Def)) | ||
return VPI->isVectorToScalar(); | ||
return VPI->isSingleScalar() || VPI->isVectorToScalar(); | ||
return false; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -196,9 +196,22 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { | |||||||||||||||||||||||||
: VPLane::getLastLaneForVF(State.VF); | ||||||||||||||||||||||||||
VPBasicBlock *MiddleVPBB = | ||||||||||||||||||||||||||
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); | ||||||||||||||||||||||||||
BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB]; | ||||||||||||||||||||||||||
Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), | ||||||||||||||||||||||||||
MiddleBB); | ||||||||||||||||||||||||||
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe(); | ||||||||||||||||||||||||||
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr; | ||||||||||||||||||||||||||
// Values leaving the vector loop reach live out phi's in the exiting block | ||||||||||||||||||||||||||
// via middle block. | ||||||||||||||||||||||||||
auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion() | ||||||||||||||||||||||||||
? MiddleVPBB | ||||||||||||||||||||||||||
: ExitingVPBB; | ||||||||||||||||||||||||||
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; | ||||||||||||||||||||||||||
// Set insertion point in PredBB in case an extract needs to be generated. | ||||||||||||||||||||||||||
// TODO: Model extracts explicitly. | ||||||||||||||||||||||||||
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth adding a comment that insertion point is set in PrefBB for potential generation of instructions to extract Is this potential post-extract why ResumePhi is considered (maybe!) vector-to-scalar? These extracts best materialize into recipes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment + TODO, thanks!
Yes, we need to select the last lane, which may require creating a new extract, if the VPValue is a vector. Added a TODO to model extracts explicitly. |
||||||||||||||||||||||||||
Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane)); | ||||||||||||||||||||||||||
if (Phi->getBasicBlockIndex(PredBB) != -1) | ||||||||||||||||||||||||||
Phi->setIncomingValueForBlock(PredBB, V); | ||||||||||||||||||||||||||
else | ||||||||||||||||||||||||||
Phi->addIncoming(V, PredBB); | ||||||||||||||||||||||||||
Comment on lines
+210
to
+214
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is where we need to figure out which predecessor P corresponds to For live outs that correspond to exit block, P is middle block (which can be retrieved directly from Plan as done now, or from State.CFG.ExitBB, or from State.builder which is (still?) set to ExitBB). Can we check ExitValue's parent to figure out P, in both cases, asserting that its corresponding IRBB is a predecessor of Phi's block? Phi's of exit block know not of middle block, as exit block (and its phi's) is originally connected to scalar loop only. fixPhi()'s documentation above needs update. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to use the defining recipe's parent, if it is outside a region, otherwise use MiddleVPBB |
||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||||||||||||||||||||||
|
@@ -337,7 +350,7 @@ bool VPInstruction::doesGeneratePerAllLanes() const { | |||||||||||||||||||||||||
bool VPInstruction::canGenerateScalarForFirstLane() const { | ||||||||||||||||||||||||||
if (Instruction::isBinaryOp(getOpcode())) | ||||||||||||||||||||||||||
return true; | ||||||||||||||||||||||||||
if (isVectorToScalar()) | ||||||||||||||||||||||||||
if (isSingleScalar() || isVectorToScalar()) | ||||||||||||||||||||||||||
return true; | ||||||||||||||||||||||||||
switch (Opcode) { | ||||||||||||||||||||||||||
case Instruction::ICmp: | ||||||||||||||||||||||||||
|
@@ -637,6 +650,27 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { | |||||||||||||||||||||||||
Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true); | ||||||||||||||||||||||||||
return Builder.CreatePtrAdd(Ptr, Addend, Name); | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
case VPInstruction::ResumePhi: { | ||||||||||||||||||||||||||
if (Part != 0) | ||||||||||||||||||||||||||
return State.get(this, 0, /*IsScalar*/ true); | ||||||||||||||||||||||||||
Value *IncomingFromVPlanPred = | ||||||||||||||||||||||||||
State.get(getOperand(0), Part, /* IsScalar */ true); | ||||||||||||||||||||||||||
Value *IncomingFromOtherPreds = | ||||||||||||||||||||||||||
State.get(getOperand(1), Part, /* IsScalar */ true); | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it conceivable for the scalar preheader to have a single predecessor, namely, VPlanPred/middle-block? I.e., when trip count is known to be larger that VFxUF and no (other) runtime checks are needed to bypass the vector loop? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes that should be possible, but it should be handled correctly (generating a phi with a single incoming value from the VPlan predecessor) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, the second operand (FOR->getStartValue()) is redundant in this case, but exists - so getOperand(1) works. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep |
||||||||||||||||||||||||||
auto *NewPhi = | ||||||||||||||||||||||||||
Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name); | ||||||||||||||||||||||||||
BasicBlock *VPlanPred = | ||||||||||||||||||||||||||
State.CFG | ||||||||||||||||||||||||||
.VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())]; | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that since middle block is a VPIRBasicBlock, its corresponding IRBB can be obtained directly via getIRBasicBlock(). |
||||||||||||||||||||||||||
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred); | ||||||||||||||||||||||||||
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) { | ||||||||||||||||||||||||||
assert(OtherPred != VPlanPred && | ||||||||||||||||||||||||||
"VPlan predecessors should not be connected yet"); | ||||||||||||||||||||||||||
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred); | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
Comment on lines
+665
to
+670
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
a bit more logical to first connect all existing predecessors asserting to not be VPlanPred, and then connect the latter(?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, but this would require updating a bunch of tests due to different incoming order. Better to be done as follow up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just had a look at this and this would mean we have different incoming orders for the induction resume values and FOR resume values until we moved induction resume values to also use ResumePhi. WDYT about only adjusting the order once both use ResumePhi? |
||||||||||||||||||||||||||
return NewPhi; | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
default: | ||||||||||||||||||||||||||
llvm_unreachable("Unsupported opcode for instruction"); | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
@@ -647,6 +681,10 @@ bool VPInstruction::isVectorToScalar() const { | |||||||||||||||||||||||||
getOpcode() == VPInstruction::ComputeReductionResult; | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
bool VPInstruction::isSingleScalar() const { | ||||||||||||||||||||||||||
return getOpcode() == VPInstruction::ResumePhi; | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
#if !defined(NDEBUG) | ||||||||||||||||||||||||||
bool VPInstruction::isFPMathOp() const { | ||||||||||||||||||||||||||
// Inspired by FPMathOperator::classof. Notable differences are that we don't | ||||||||||||||||||||||||||
|
@@ -667,9 +705,9 @@ void VPInstruction::execute(VPTransformState &State) { | |||||||||||||||||||||||||
if (hasFastMathFlags()) | ||||||||||||||||||||||||||
State.Builder.setFastMathFlags(getFastMathFlags()); | ||||||||||||||||||||||||||
State.setDebugLocFrom(getDebugLoc()); | ||||||||||||||||||||||||||
bool GeneratesPerFirstLaneOnly = | ||||||||||||||||||||||||||
canGenerateScalarForFirstLane() && | ||||||||||||||||||||||||||
(vputils::onlyFirstLaneUsed(this) || isVectorToScalar()); | ||||||||||||||||||||||||||
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && | ||||||||||||||||||||||||||
(vputils::onlyFirstLaneUsed(this) || | ||||||||||||||||||||||||||
isVectorToScalar() || isSingleScalar()); | ||||||||||||||||||||||||||
bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); | ||||||||||||||||||||||||||
bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this); | ||||||||||||||||||||||||||
for (unsigned Part = 0; Part < State.UF; ++Part) { | ||||||||||||||||||||||||||
|
@@ -721,6 +759,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |||||||||||||||||||||||||
case VPInstruction::CanonicalIVIncrementForPart: | ||||||||||||||||||||||||||
case VPInstruction::BranchOnCount: | ||||||||||||||||||||||||||
case VPInstruction::BranchOnCond: | ||||||||||||||||||||||||||
case VPInstruction::ResumePhi: | ||||||||||||||||||||||||||
return true; | ||||||||||||||||||||||||||
}; | ||||||||||||||||||||||||||
llvm_unreachable("switch should return"); | ||||||||||||||||||||||||||
|
@@ -773,6 +812,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |||||||||||||||||||||||||
case VPInstruction::ActiveLaneMask: | ||||||||||||||||||||||||||
O << "active lane mask"; | ||||||||||||||||||||||||||
break; | ||||||||||||||||||||||||||
case VPInstruction::ResumePhi: | ||||||||||||||||||||||||||
O << "resume-phi"; | ||||||||||||||||||||||||||
break; | ||||||||||||||||||||||||||
case VPInstruction::ExplicitVectorLength: | ||||||||||||||||||||||||||
O << "EXPLICIT-VECTOR-LENGTH"; | ||||||||||||||||||||||||||
break; | ||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -937,22 +937,16 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, | |
Type *IntTy = Plan.getCanonicalIV()->getScalarType(); | ||
|
||
// Extract the penultimate value of the recurrence and update VPLiveOut | ||
// users of the recurrence splice. | ||
// users of the recurrence splice. Note that the extract of the final value | ||
// used to resume in the scalar loop is created earlier during VPlan | ||
// construction. | ||
auto *Penultimate = cast<VPInstruction>(MiddleBuilder.createNaryOp( | ||
VPInstruction::ExtractFromEnd, | ||
{FOR->getBackedgeValue(), | ||
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 2))}, | ||
{}, "vector.recur.extract.for.phi")); | ||
RecurSplice->replaceUsesWithIf( | ||
Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); }); | ||
|
||
// Extract the resume value and create a new VPLiveOut for it. | ||
auto *Resume = MiddleBuilder.createNaryOp( | ||
VPInstruction::ExtractFromEnd, | ||
{FOR->getBackedgeValue(), | ||
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 1))}, | ||
{}, "vector.recur.extract"); | ||
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), Resume); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that this live-out corresponds to the scalar header and is fed a value (Resume) created in the middle block, i.e., w/o going through the scalar preheader. It is currently wired explicitly, rather than via VPLiveOut::fixPhi(), which as documented deals only with wiring directly to middle block live-outs that correspond to the exit block. Worth leaving behind a comment that the s_resume value is created in the middle block and fed into the scalar header via the scalar preheader, at VPlan construction. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extended the comment above, thanks! |
||
} | ||
return true; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's more to remove here...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Of course, removed, thanks!