-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Add VPIRInstruction, use for exit block live-outs. #100735
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2f441bb
b7542dc
540b9c5
0284675
82b1669
de86741
66dfc24
9a40ed0
958f130
aab1463
c8a1add
3dc696b
f66d27e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8630,11 +8630,12 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, | |
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL); | ||
} | ||
|
||
// Collect (ExitPhi, ExitingValue) pairs phis in the original exit block that | ||
// are modeled in VPlan. Some exiting values are not modeled explicitly yet and | ||
// won't be included. Those are un-truncated VPWidenIntOrFpInductionRecipe, | ||
// VPWidenPointerInductionRecipe and induction increments. | ||
static MapVector<PHINode *, VPValue *> collectUsersInExitBlock( | ||
// Collect VPIRInstructions for phis in the original exit block that are modeled | ||
// in VPlan and add the exiting VPValue as operand. Some exiting values are not | ||
// modeled explicitly yet and won't be included. Those are un-truncated | ||
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction | ||
// increments. | ||
static SetVector<VPIRInstruction *> collectUsersInExitBlock( | ||
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, | ||
const MapVector<PHINode *, InductionDescriptor> &Inductions) { | ||
auto *MiddleVPBB = | ||
|
@@ -8644,13 +8645,17 @@ static MapVector<PHINode *, VPValue *> collectUsersInExitBlock( | |
// from scalar loop only. | ||
if (MiddleVPBB->getNumSuccessors() != 2) | ||
return {}; | ||
MapVector<PHINode *, VPValue *> ExitingValuesToFix; | ||
BasicBlock *ExitBB = | ||
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); | ||
SetVector<VPIRInstruction *> ExitUsersToFix; | ||
VPBasicBlock *ExitVPBB = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0]); | ||
BasicBlock *ExitingBB = OrigLoop->getExitingBlock(); | ||
for (PHINode &ExitPhi : ExitBB->phis()) { | ||
Value *IncomingValue = | ||
ExitPhi.getIncomingValueForBlock(ExitingBB); | ||
for (VPRecipeBase &R : *ExitVPBB) { | ||
auto *ExitIRI = dyn_cast<VPIRInstruction>(&R); | ||
if (!ExitIRI) | ||
continue; | ||
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction()); | ||
if (!ExitPhi) | ||
break; | ||
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); | ||
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); | ||
// Exit values for inductions are computed and updated outside of VPlan and | ||
// independent of induction recipes. | ||
|
@@ -8666,17 +8671,18 @@ static MapVector<PHINode *, VPValue *> collectUsersInExitBlock( | |
return P && Inductions.contains(P); | ||
}))) | ||
continue; | ||
ExitingValuesToFix.insert({&ExitPhi, V}); | ||
ExitUsersToFix.insert(ExitIRI); | ||
ExitIRI->addOperand(V); | ||
} | ||
return ExitingValuesToFix; | ||
return ExitUsersToFix; | ||
} | ||
|
||
// Add exit values to \p Plan. Extracts and VPLiveOuts are added for each entry | ||
// in \p ExitingValuesToFix. | ||
// Add exit values to \p Plan. Extracts are added for each entry in \p | ||
// ExitUsersToFix if needed and their operands are updated. | ||
static void | ||
addUsersInExitBlock(VPlan &Plan, | ||
MapVector<PHINode *, VPValue *> &ExitingValuesToFix) { | ||
if (ExitingValuesToFix.empty()) | ||
const SetVector<VPIRInstruction *> &ExitUsersToFix) { | ||
if (ExitUsersToFix.empty()) | ||
return; | ||
|
||
auto *MiddleVPBB = | ||
|
@@ -8685,18 +8691,19 @@ addUsersInExitBlock(VPlan &Plan, | |
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); | ||
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); | ||
|
||
// Introduce VPUsers modeling the exit values. | ||
for (const auto &[ExitPhi, V] : ExitingValuesToFix) { | ||
// Introduce extract for exiting values and update the VPIRInstructions | ||
// modeling the corresponding LCSSA phis. | ||
for (VPIRInstruction *ExitIRI : ExitUsersToFix) { | ||
VPValue *V = ExitIRI->getOperand(0); | ||
// Pass live-in values used by exit phis directly through to the live-out. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Unrelated) Do such values that bypass the vector loop exist, can VPlan ignore them. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We still need to add incoming values for new edges added, I think, which at the moment is modeled naturally in VPlan There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Phi's in exit block that are fed by live-in values which bypass the original loop, seem redundant - should they be considered lcssa phi's? Can be checked later, independently. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They are redundant, but I think there were some cases where it made it through the pipeline (there's a number of cases where various unsimplified constructs make it to LV), which caused adding https://github.com/llvm/llvm-project/blob/main/llvm/test/Transforms/LoopVectorize/extract-from-end-vector-constant.ll#L35. We could alternatively remove them after vectorization, but this cannot be done in VPlan, as the users of the exit phi are not modeled. Doing a separate cleanup outside of VPlan would also be an option, but less desirable than the current solution? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, optimizing away redundant phi's in exit block that are fed by live-in values which bypass the original loop, and which do not qualify as lcssa phi's, best be done before LV, which relies on (proper) lcssa form. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, but we still need to handle incoming IR that may not be simplified. Or would you prefer to run some pre-cleanup passes before vectorizing? At the moment, I think there is no easy way to do this cleanup before creating VPlans and not modifying the original IR, if we are not vectorizing. |
||
if (V->isLiveIn()) { | ||
Plan.addLiveOut(ExitPhi, V); | ||
if (V->isLiveIn()) | ||
continue; | ||
} | ||
|
||
VPValue *Ext = B.createNaryOp( | ||
VPInstruction::ExtractFromEnd, | ||
{V, Plan.getOrAddLiveIn(ConstantInt::get( | ||
IntegerType::get(ExitBB->getContext(), 32), 1))}); | ||
Plan.addLiveOut(ExitPhi, Ext); | ||
ExitIRI->setOperand(0, Ext); | ||
} | ||
} | ||
|
||
|
@@ -8709,7 +8716,7 @@ addUsersInExitBlock(VPlan &Plan, | |
/// 2. Feed the penultimate value of recurrences to their LCSSA phi users in | ||
/// the original exit block using a VPLiveOut. | ||
static void addLiveOutsForFirstOrderRecurrences( | ||
VPlan &Plan, MapVector<PHINode *, VPValue *> &ExitingValuesToFix) { | ||
VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix) { | ||
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); | ||
|
||
// Start by finding out if middle block branches to scalar preheader, which is | ||
|
@@ -8726,14 +8733,14 @@ static void addLiveOutsForFirstOrderRecurrences( | |
ExitBB = | ||
cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors()[0])->getIRBasicBlock(); | ||
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]); | ||
} else if (ExitingValuesToFix.empty()) { | ||
} else if (ExitUsersToFix.empty()) { | ||
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()); | ||
} else { | ||
ExitBB = cast<VPIRBasicBlock>(MiddleVPBB->getSingleSuccessor()) | ||
->getIRBasicBlock(); | ||
} | ||
if (!ScalarPHVPBB) { | ||
assert(ExitingValuesToFix.empty() && | ||
assert(ExitUsersToFix.empty() && | ||
"missed inserting extracts for exiting values"); | ||
return; | ||
} | ||
|
@@ -8827,24 +8834,17 @@ static void addLiveOutsForFirstOrderRecurrences( | |
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr()); | ||
Plan.addLiveOut(FORPhi, ResumePhiRecipe); | ||
|
||
// Now create VPLiveOuts for users in the exit block. | ||
// Extract the penultimate value of the recurrence and add VPLiveOut | ||
// users of the recurrence splice. | ||
|
||
// No edge from the middle block to the unique exit block has been inserted | ||
// and there is nothing to fix from vector loop; phis should have incoming | ||
// from scalar loop only. | ||
if (ExitingValuesToFix.empty()) | ||
continue; | ||
for (User *U : FORPhi->users()) { | ||
auto *UI = cast<Instruction>(U); | ||
if (UI->getParent() != ExitBB) | ||
// Now update VPIRInstructions modeling LCSSA phis in the exit block. | ||
// Extract the penultimate value of the recurrence and use it as operand for | ||
// the VPIRInstruction modeling the phi. | ||
for (VPIRInstruction *ExitIRI : ExitUsersToFix) { | ||
if (ExitIRI->getOperand(0) != FOR) | ||
continue; | ||
VPValue *Ext = MiddleBuilder.createNaryOp( | ||
VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {}, | ||
"vector.recur.extract.for.phi"); | ||
Plan.addLiveOut(cast<PHINode>(UI), Ext); | ||
ExitingValuesToFix.erase(cast<PHINode>(UI)); | ||
ExitIRI->setOperand(0, Ext); | ||
ExitUsersToFix.remove(ExitIRI); | ||
} | ||
} | ||
} | ||
|
@@ -9006,11 +9006,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |
"VPBasicBlock"); | ||
RecipeBuilder.fixHeaderPhis(); | ||
|
||
MapVector<PHINode *, VPValue *> ExitingValuesToFix = collectUsersInExitBlock( | ||
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock( | ||
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); | ||
|
||
addLiveOutsForFirstOrderRecurrences(*Plan, ExitingValuesToFix); | ||
addUsersInExitBlock(*Plan, ExitingValuesToFix); | ||
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix); | ||
addUsersInExitBlock(*Plan, ExitUsersToFix); | ||
|
||
// --------------------------------------------------------------------------- | ||
// Transform initial VPlan: Apply previously taken decisions, in order, to | ||
|
@@ -10128,7 +10127,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
// directly in VPlan. | ||
EpilogILV.setTripCount(MainILV.getTripCount()); | ||
for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) { | ||
auto *ExpandR = cast<VPExpandSCEVRecipe>(&R); | ||
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R); | ||
if (!ExpandR) | ||
continue; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noting: the (pre)preheader VPIRBB may now have VPIRI's (that need to be skipped here) in addition to VPExpandSCEVRecipe. |
||
auto *ExpandedVal = BestEpiPlan.getOrAddLiveIn( | ||
ExpandedSCEVs.find(ExpandR->getSCEV())->second); | ||
ExpandR->replaceAllUsesWith(ExpandedVal); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -860,10 +860,18 @@ VPlan::~VPlan() { | |
delete BackedgeTakenCount; | ||
} | ||
|
||
static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) { | ||
auto *VPIRBB = new VPIRBasicBlock(BB); | ||
for (Instruction &I : | ||
make_range(BB->begin(), BB->getTerminator()->getIterator())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would a simple There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is intentional to skip the terminators and it allows to insert new recipes at the end of the VPIRBB. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is ok, adding some further thoughts: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, only partially populating would probably be a bit more logic to skip undesired parts. Not adding the branches also have the benefit that we don't remove the original terminator VPIRI from the middle block when adding the VPlan-modeled branch recipes. |
||
VPIRBB->appendRecipe(new VPIRInstruction(I)); | ||
return VPIRBB; | ||
} | ||
|
||
VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, | ||
bool RequiresScalarEpilogueCheck, | ||
bool TailFolded, Loop *TheLoop) { | ||
VPIRBasicBlock *Entry = new VPIRBasicBlock(TheLoop->getLoopPreheader()); | ||
VPIRBasicBlock *Entry = createVPIRBasicBlockFor(TheLoop->getLoopPreheader()); | ||
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); | ||
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader); | ||
Plan->TripCount = | ||
|
@@ -895,7 +903,7 @@ VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE, | |
// we unconditionally branch to the scalar preheader. Do nothing. | ||
// 3) Otherwise, construct a runtime check. | ||
BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); | ||
auto *VPExitBlock = new VPIRBasicBlock(IRExitBlock); | ||
auto *VPExitBlock = createVPIRBasicBlockFor(IRExitBlock); | ||
// The connection order corresponds to the operands of the conditional branch. | ||
VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
|
@@ -972,7 +980,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, | |
/// predecessor, which is rewired to the new VPIRBasicBlock. All successors of | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (independent) All recipes of VPBB are moved to the beginning of the newly created VPIRBB, followed by VPIRI's wrapping IRBB's instructions. IRBB must be free of phi's? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth clarifying and asserting (independently). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done in f66509b. IIUC the recipes from VPBB are moved after the existing VPIRInstructions in IRMiddleVPB |
||
/// VPBB, if any, are rewired to the new VPIRBasicBlock. | ||
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { | ||
VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB); | ||
VPIRBasicBlock *IRMiddleVPBB = createVPIRBasicBlockFor(IRBB); | ||
for (auto &R : make_early_inc_range(*VPBB)) | ||
R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end()); | ||
VPBlockBase *PredVPBB = VPBB->getSinglePredecessor(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -936,8 +936,9 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { | |
case VPRecipeBase::VPReductionPHISC: | ||
case VPRecipeBase::VPScalarCastSC: | ||
return true; | ||
case VPRecipeBase::VPInterleaveSC: | ||
case VPRecipeBase::VPBranchOnMaskSC: | ||
case VPRecipeBase::VPInterleaveSC: | ||
case VPRecipeBase::VPIRInstructionSC: | ||
case VPRecipeBase::VPWidenLoadEVLSC: | ||
case VPRecipeBase::VPWidenLoadSC: | ||
case VPRecipeBase::VPWidenStoreEVLSC: | ||
|
@@ -1405,6 +1406,45 @@ class VPInstruction : public VPRecipeWithIRFlags { | |
bool isSingleScalar() const; | ||
}; | ||
|
||
/// A recipe to wrap on original IR instruction not to be modified during | ||
/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed, | ||
/// and it is used to add a new incoming value for the single predecessor VPBB. | ||
/// Expect PHIs, VPIRInstructions cannot have any operands. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would a separate (sub)recipe for phi's be better? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could do if preferred, but it would pull in additional complexity for relatively small gains? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could be, OTOH may simplify rather than complicate, worth evaluating, possibly as a follow-up. |
||
class VPIRInstruction : public VPRecipeBase { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could be a VPSingleDefRecipe, as IR instructions define at most one value, but this currently serves only as a placeholder to advance the insert position, and as a live-out use/operand. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's at the moment intentionally not a VPSingleDefRecipe, as VPIRIntructions at the moment don't need their def-use chains modeled in VPlan. Making them VPSingleDefRecipe without also modeling VPIRInstruction's operands as VPValues would probably be q bit surprising as the def-use chains aren't connected. Might be good as follow-up, however we would still need to decide how to deal with exit phis that have their original operand coming from the scalar loop, for which is not yet modeled in VPlan. |
||
Instruction &I; | ||
|
||
public: | ||
VPIRInstruction(Instruction &I) | ||
: VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {} | ||
|
||
~VPIRInstruction() override = default; | ||
|
||
VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC) | ||
|
||
VPIRInstruction *clone() override { | ||
auto *R = new VPIRInstruction(I); | ||
for (auto *Op : operands()) | ||
R->addOperand(Op); | ||
return R; | ||
} | ||
|
||
void execute(VPTransformState &State) override; | ||
|
||
Instruction &getInstruction() { return I; } | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
/// Print the recipe. | ||
void print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const override; | ||
#endif | ||
|
||
bool usesScalars(const VPValue *Op) const override { | ||
assert(is_contained(operands(), Op) && | ||
"Op must be an operand of the recipe"); | ||
return true; | ||
} | ||
}; | ||
|
||
/// VPWidenRecipe is a recipe for producing a widened instruction using the | ||
/// opcode and operands of the recipe. This recipe covers most of the | ||
/// traditional vectorization cases where each recipe transforms into a | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -867,6 +867,43 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |
} | ||
#endif | ||
|
||
void VPIRInstruction::execute(VPTransformState &State) { | ||
assert((isa<PHINode>(&I) || getNumOperands() == 0) && | ||
"Only PHINodes can have extra operands"); | ||
if (getNumOperands() == 1) { | ||
VPValue *ExitValue = getOperand(0); | ||
auto Lane = vputils::isUniformAfterVectorization(ExitValue) | ||
? VPLane::getFirstLane() | ||
: VPLane::getLastLaneForVF(State.VF); | ||
auto *PredVPBB = cast<VPBasicBlock>(getParent()->getSinglePredecessor()); | ||
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; | ||
// Set insertion point in PredBB in case an extract needs to be generated. | ||
// TODO: Model extracts explicitly. | ||
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); | ||
Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane)); | ||
auto *Phi = cast<PHINode>(&I); | ||
Phi->addIncoming(V, PredBB); | ||
} | ||
|
||
// Advance the insert point after the wrapped IR instruction. This allows | ||
// interleaving VPIRInstructions and other recipes. | ||
State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this simplify VPIRBasicBlock::execute(), where this setInsertPoint() replaces the one there. Independent: can VPBasicBlock::execute() be simplified, given VPIRBasicBlocks: "A. the first VPBB reuses the loop pre-header BB" - should this reuse be modelled by fusing into a VPIRBasicBlock? Further cases should be simplified once replicate regions are unfolded as a VPlan2VPlan transformation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Unfortunately not yet; We still need to set the default insert point for empty VPIRBasicBlocks and also some IR instructions may be created in VPIRBasicBlock (the pre-preheader) only during skeleton creation, which will be missing from the VPIRBB. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth explaining regarding setting the insertion point, that VPIRBBs and VPIRIs facilitate introducing recipes among existing instructions in existing BBs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment, thanks! |
||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const { | ||
O << Indent << "IR " << I; | ||
|
||
if (getNumOperands() != 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This, e.g., must be true for VPIRPhi's and false for other VPIRI's. |
||
assert(getNumOperands() == 1 && "can have at most 1 operand"); | ||
O << " (extra operand: "; | ||
printOperands(O, SlotTracker); | ||
O << ")"; | ||
} | ||
} | ||
#endif | ||
|
||
void VPWidenCallRecipe::execute(VPTransformState &State) { | ||
assert(State.VF.isVector() && "not widening"); | ||
Function *CalledScalarFn = getCalledScalarFunction(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -339,6 +339,7 @@ class VPDef { | |
VPBranchOnMaskSC, | ||
VPDerivedIVSC, | ||
VPExpandSCEVSC, | ||
VPIRInstructionSC, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recipes that represent phi's need to appear below, for isPhi() to work. Should VPIRInstruction be split into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Below is just for header phis, so I think it wouldn't apply of VPIRPhi at least for now. I've left it for now as is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's also for PredInstPhi, a non-loop-header if-then hammock phi of a replicate region. It should be for all phi's planned to be generated - to help keep them together at the start of their basic block. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah yes, we have a header phi ids are grouped together separately. |
||
VPInstructionSC, | ||
VPInterleaveSC, | ||
VPReductionEVLSC, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,6 +58,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur | |
; CHECK-NEXT: vp<%2> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body.preheader>: | ||
; CHECK-NEXT: IR %0 = zext i32 %n to i64 | ||
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) | ||
Comment on lines
+61
to
62
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noting: VPIRBB with both VPIRI and non-VPIRI recipes. |
||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
|
@@ -141,6 +142,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur | |
; CHECK-NEXT: vp<%2> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body.preheader>: | ||
; CHECK-NEXT: IR %0 = zext i32 %n to i64 | ||
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
|
@@ -260,6 +262,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur | |
; CHECK-NEXT: vp<%2> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body.preheader>: | ||
; CHECK-NEXT: IR %0 = zext i32 %n to i64 | ||
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
|
@@ -343,6 +346,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur | |
; CHECK-NEXT: vp<%2> = original trip-count | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: ir-bb<for.body.preheader>: | ||
; CHECK-NEXT: IR %0 = zext i32 %n to i64 | ||
; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64) | ||
; CHECK-NEXT: No successors | ||
; CHECK-EMPTY: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Breaking because all remaining recipes are expected to be non phi's? Should the lcssa VPIRI's be considered phi recipes (as do loop header phi recipes) to help ensure they appear before non-phi recipes (VPIRI's) in the VP(IR)BB?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.
We could but this would pull in more complexity in this patch without much benefits initially. Perhaps better done once a more concrete need arises/as follow up?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A separate VPIRPhi subclass of VIPIRInstruction dedicated to phi's would arguably model the distinction between the two more accurately, and help simplify their documentation, operand behavior, execute(), keeping phi's together, iterating over them, etc., but this can be considered as a follow-up.