Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
b7b23a9
[LV] Use ExtractLane(LastActiveLane, V) live outs when tail-folding.
fhahn Oct 2, 2025
9985387
Merge remote-tracking branch 'origin/main' into lv-tf-external-users
fhahn Oct 13, 2025
6b0c9d2
!fixup add LastActiveLane opcode, lowered via FirstActiveLane.
fhahn Oct 13, 2025
2a45f94
Merge remote-tracking branch 'origin/main' into lv-tf-external-users
fhahn Oct 15, 2025
7d1ab3c
!fixup verify LastActiveLane.
fhahn Oct 15, 2025
8ca0426
Merge remote-tracking branch 'origin/main' into lv-tf-external-users
fhahn Oct 15, 2025
96c827b
[VPlan] Mark VPlan argument in isHeaderMask as const (NFC).
fhahn Oct 15, 2025
72f3796
!fixup clean up verifier changes
fhahn Oct 15, 2025
f3fdac7
Merge remote-tracking branch 'origin/main' into lv-tf-external-users
fhahn Oct 16, 2025
af82079
!fixup add note about operands needing to be prefix-masks.
fhahn Oct 16, 2025
cc7e913
!fixup fix formatting
fhahn Oct 16, 2025
2d3012f
Merge remote-tracking branch 'origin/main' into lv-tf-external-users
fhahn Oct 23, 2025
4fa7442
!fixup tighten verifier check for broadcast of EVL
fhahn Oct 23, 2025
ae81e08
Merge remote-tracking branch 'origin/main' into lv-tf-external-users
fhahn Nov 11, 2025
f10fa37
!fixup add additional pattern matching helpers.
fhahn Nov 11, 2025
2758f96
Merge remote-tracking branch 'origin/main' into lv-tf-external-users
fhahn Nov 11, 2025
73b3197
Merge remote-tracking branch 'origin/main' into lv-tf-external-users
fhahn Nov 12, 2025
d8e77ca
!fixup address comments, thanks
fhahn Nov 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2096,24 +2096,6 @@ bool LoopVectorizationLegality::canFoldTailByMasking() const {
for (const auto &Reduction : getReductionVars())
ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());

// TODO: handle non-reduction outside users when tail is folded by masking.
for (auto *AE : AllowedExit) {
// Check that all users of allowed exit values are inside the loop or
// are the live-out of a reduction.
if (ReductionLiveOuts.count(AE))
continue;
for (User *U : AE->users()) {
Instruction *UI = cast<Instruction>(U);
if (TheLoop->contains(UI))
continue;
LLVM_DEBUG(
dbgs()
<< "LV: Cannot fold tail by masking, loop has an outside user for "
<< *UI << "\n");
return false;
}
}

for (const auto &Entry : getInductionVars()) {
PHINode *OrigPhi = Entry.first;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the PR handle the IV liveout user as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This patch still leaves inductions untouched for now

for (User *U : OrigPhi->users()) {
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8803,7 +8803,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (FinalReductionResult == U || Parent->getParent())
continue;
U->replaceUsesOfWith(OrigExitingVPV, FinalReductionResult);
if (match(U, m_ExtractLastElement(m_VPValue())))
if (match(U,
m_CombineOr(m_VPInstruction<VPInstruction::ExtractLastElement>(
m_VPValue()),
m_VPInstruction<VPInstruction::ExtractLane>(
m_VPValue(), m_VPValue()))))
cast<VPInstruction>(U)->replaceAllUsesWith(FinalReductionResult);
}

Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,13 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
// It produces the lane index across all unrolled iterations. Unrolling will
// add all copies of its original operand as additional operands.
FirstActiveLane,
// Calculates the last active lane index of the vector predicate operands.
// The predicates must be prefix-masks (all 1s before all 0s). Used when
// tail-folding to extract the correct live-out value from the last active
// iteration. It produces the lane index across all unrolled iterations.
// Unrolling will add all copies of its original operand as additional
// operands.
LastActiveLane,

// The opcodes below are used for VPInstructionWithType.
//
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::ExtractLane:
return inferScalarType(R->getOperand(1));
case VPInstruction::FirstActiveLane:
case VPInstruction::LastActiveLane:
return Type::getIntNTy(Ctx, 64);
case VPInstruction::ExtractLastElement:
case VPInstruction::ExtractLastLanePerPart:
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,16 @@ m_FirstActiveLane(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::FirstActiveLane>(Op0);
}

template <typename Op0_t>
inline VPInstruction_match<VPInstruction::LastActiveLane, Op0_t>
m_LastActiveLane(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::LastActiveLane>(Op0);
}

inline VPInstruction_match<VPInstruction::StepVector> m_StepVector() {
return m_VPInstruction<VPInstruction::StepVector>();
}

template <unsigned Opcode, typename Op0_t>
inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
return AllRecipe_match<Opcode, Op0_t>(Op0);
Expand Down
43 changes: 38 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@ class VPPredicator {
/// possibly inserting new recipes at \p Dst (using Builder's insertion point)
VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst);

/// Returns the *entry* mask for \p VPBB.
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
return BlockMaskCache.lookup(VPBB);
}

/// Record \p Mask as the *entry* mask of \p VPBB, which is expected to not
/// already have a mask.
void setBlockInMask(VPBasicBlock *VPBB, VPValue *Mask) {
Expand All @@ -68,6 +63,11 @@ class VPPredicator {
}

public:
/// Returns the *entry* mask for \p VPBB.
VPValue *getBlockInMask(VPBasicBlock *VPBB) const {
return BlockMaskCache.lookup(VPBB);
}

/// Returns the precomputed predicate of the edge from \p Src to \p Dst.
VPValue *getEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
return EdgeMaskCache.lookup({Src, Dst});
Expand Down Expand Up @@ -301,5 +301,38 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {

PrevVPBB = VPBB;
}

// If we folded the tail and introduced a header mask, any extract of the
// last element must be updated to extract from the last active lane of the
// header mask instead (i.e., the lane corresponding to the last active
// iteration).
if (FoldTail) {
assert(Plan.getExitBlocks().size() == 1 &&
"only a single-exit block is supported currently");
VPBasicBlock *EB = Plan.getExitBlocks().front();
assert(EB->getSinglePredecessor() == Plan.getMiddleBlock() &&
"the exit block must have middle block as single predecessor");

VPValue *LastActiveLane = nullptr;
VPBuilder B(Plan.getMiddleBlock()->getTerminator());
for (auto &P : EB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&P);
VPValue *Inc = ExitIRI->getIncomingValue(0);
VPValue *Op;
if (!match(Inc, m_ExtractLastElement(m_VPValue(Op))))
continue;

if (!LastActiveLane) {
// Compute the index of the last active lane.
VPValue *HeaderMask = Predicator.getBlockInMask(
Plan.getVectorLoopRegion()->getEntryBasicBlock());
LastActiveLane =
B.createNaryOp(VPInstruction::LastActiveLane, {HeaderMask});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to cache the last active lane or can we let cse take care of it? Seeing as I guess we're relying on removeDeadRecipes to get rid of the old extractlastelement

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that works now, removed, thanks

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can use the singleton ArrayRef constructor

Suggested change
B.createNaryOp(VPInstruction::LastActiveLane, {HeaderMask});
B.createNaryOp(VPInstruction::LastActiveLane, HeaderMask);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, updated thanks

}
auto *Ext =
B.createNaryOp(VPInstruction::ExtractLane, {LastActiveLane, Op});
Inc->replaceAllUsesWith(Ext);
}
}
return Predicator.getBlockMaskCache();
}
37 changes: 37 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
case VPInstruction::LastActiveLane:
case VPInstruction::Not:
return 1;
case Instruction::ICmp:
Expand Down Expand Up @@ -640,6 +641,14 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
case Instruction::Select: {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
if (!OnlyFirstLaneUsed) {
auto IsVectorToScalar = [](VPValue *V) {
auto *VI = dyn_cast<VPInstruction>(V);
return VI && VI->isVectorToScalar();
};
OnlyFirstLaneUsed =
IsVectorToScalar(getOperand(1)) && IsVectorToScalar(getOperand(2));
}
Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
Expand Down Expand Up @@ -1120,6 +1129,29 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
{PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
}
case VPInstruction::LastActiveLane: {
Type *ScalarTy = Ctx.Types.inferScalarType(getOperand(0));
if (VF.isScalar())
return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
CmpInst::makeCmpResultType(ScalarTy),
CmpInst::ICMP_EQ, Ctx.CostKind);
// Calculate the cost of determining the lane index: NOT + cttz_elts + SUB.
auto *PredTy = toVectorTy(ScalarTy, VF);
IntrinsicCostAttributes Attrs(Intrinsic::experimental_cttz_elts,
Type::getInt64Ty(Ctx.LLVMCtx),
{PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});
InstructionCost Cost = Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
// Add cost of NOT operation on the predicate.
Cost += Ctx.TTI.getArithmeticInstrCost(
Instruction::Xor, PredTy, Ctx.CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
{TargetTransformInfo::OK_UniformConstantValue,
TargetTransformInfo::OP_None});
// Add cost of SUB operation on the index.
Cost += Ctx.TTI.getArithmeticInstrCost(
Instruction::Sub, Type::getInt64Ty(Ctx.LLVMCtx), Ctx.CostKind);
return Cost;
}
case VPInstruction::FirstOrderRecurrenceSplice: {
assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?");
SmallVector<int> Mask(VF.getKnownMinValue());
Expand Down Expand Up @@ -1174,6 +1206,7 @@ bool VPInstruction::isVectorToScalar() const {
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::ExtractLane ||
getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::LastActiveLane ||
getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
Expand Down Expand Up @@ -1237,6 +1270,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ActiveLaneMask:
case VPInstruction::FirstActiveLane:
case VPInstruction::LastActiveLane:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
case VPInstruction::Not:
Expand Down Expand Up @@ -1411,6 +1445,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::FirstActiveLane:
O << "first-active-lane";
break;
case VPInstruction::LastActiveLane:
O << "last-active-lane";
break;
case VPInstruction::ReductionStartVector:
O << "reduction-start-vector";
break;
Expand Down
55 changes: 55 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1965,6 +1965,33 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
// Set the first operand of RecurSplice to FOR again, after replacing
// all users.
RecurSplice->setOperand(0, FOR);

// Check for users extracting the second-to-last active lane of the FOR. If
// only a single lane is active in the current iteration, we need to select
// the last element of the value from the previous iteration, directly from
// the FOR phi.
for (VPUser *U : RecurSplice->users()) {
if (!match(U,
m_VPInstruction<VPInstruction::ExtractLane>(
m_LastActiveLane(m_VPValue()), m_Specific(RecurSplice))))
continue;

VPBuilder B(cast<VPInstruction>(U));
VPValue *LastActiveLane = cast<VPInstruction>(U)->getOperand(0);
Type *I64Ty = Type::getInt64Ty(Plan.getContext());
VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 0));
VPValue *One = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 1));
VPValue *PenultimateIndex =
B.createNaryOp(Instruction::Sub, {LastActiveLane, One});
VPValue *PenultimateLastIter =
B.createNaryOp(VPInstruction::ExtractLane,
{PenultimateIndex, FOR->getBackedgeValue()});
VPValue *LastPrevIter =
B.createNaryOp(VPInstruction::ExtractLastElement, {FOR});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
B.createNaryOp(VPInstruction::ExtractLastElement, {FOR});
B.createNaryOp(VPInstruction::ExtractLastElement, FOR);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done thanks

VPValue *Cmp = B.createICmp(CmpInst::ICMP_EQ, LastActiveLane, Zero);
VPValue *Sel = B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
cast<VPInstruction>(U)->replaceAllUsesWith(Sel);
}
}
return true;
}
Expand Down Expand Up @@ -3362,6 +3389,34 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
ToRemove.push_back(Expr);
}

// Expand LastActiveLane into Not + FirstActiveLane + Sub.
auto *LastActiveL = dyn_cast<VPInstruction>(&R);
if (LastActiveL &&
LastActiveL->getOpcode() == VPInstruction::LastActiveLane) {
// Create Not(Mask) for all operands.
SmallVector<VPValue *, 2> NotMasks;
for (VPValue *Op : LastActiveL->operands()) {
VPValue *NotMask = Builder.createNot(Op, LastActiveL->getDebugLoc());
NotMasks.push_back(NotMask);
}

// Create FirstActiveLane on the inverted masks.
VPValue *FirstInactiveLane = Builder.createNaryOp(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we have to be careful about this transformation because it's making the assumption that there are no holes in the mask. For example, you'd presumably get something unexpected if you transform LastActiveLane(11100111000) -> FirstActiveLane(00011000111).

It might require adding documentation to the opcodes saying that holes are not permitted when using the LastActiveLane opcode. I don't know if there is a way to add an assert here that the mask doesn't contain holes? Presumably it should only be used when the mask has originally come get.active.lane.mask?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, for now the operands will always be header masks (or EVL based masks), so we can verify that (added to VPlanVerifier). Updated and extended the comment for the opcode, thanks

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I've added a similar opcode (ExtractLastActive) as part of https://github.com/llvm/llvm-project/pull/158088/files#diff-dabd8bf9956285f5ddab712af4e0494647a106234f07283b0bd9f8b4d9b887ca , and that does expect that there might be holes.

Should we keep the opcodes separate, or express one in terms of the other and enforce the 'contiguousness' of active lanes directly for your code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having the LastActiveLane check separate has the potential advantage that it could expose more CSE/simplification opportuntiies. The restriction for prefix-masks at the moment is only there due to how it gets lowered using FirstActiveLane.

I think we can just remove the restriction once we have a use-case for non-prefix-masks and add dedicated lowering that supports them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really can't think of when we'd end up using LastActiveLane with anything but the header mask.

Would it be simpler to just make LastActiveLane a nullary op, and just fetch the header mask with findHeaderMask when converting to a concrete recipe? That way we don't have to worry about the non-contiguous invariant.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately I don't think that's possible at the moment, as at the point when we call covnertToConcreteRecipes, the region may have been removed (or if the LastActiveLane would be the only user of the header mask it may have been removed), so I've left it as-is for now

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also realising now that we need to model the use of the header mask to prevent removeDeadRecipes from stripping it if e.g. it's EVL tail folded and all other uses of it are removed.

VPInstruction::FirstActiveLane, NotMasks,
LastActiveL->getDebugLoc(), "first.inactive.lane");

// Subtract 1 to get the last active lane.
VPValue *One = Plan.getOrAddLiveIn(
ConstantInt::get(Type::getInt64Ty(Plan.getContext()), 1));
VPValue *LastLane = Builder.createNaryOp(
Instruction::Sub, {FirstInactiveLane, One},
LastActiveL->getDebugLoc(), "last.active.lane");

LastActiveL->replaceAllUsesWith(LastLane);
ToRemove.push_back(LastActiveL);
continue;
}

VPValue *VectorStep;
VPValue *ScalarStep;
if (!match(&R, m_VPInstruction<VPInstruction::WideIVStep>(
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
match(&R, m_FirstActiveLane(m_VPValue(Op1))) ||
match(&R, m_LastActiveLane(m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
Expand All @@ -376,6 +377,12 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
m_VPValue(Op0)))) {
addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
if (isa<VPFirstOrderRecurrencePHIRecipe>(Op0)) {
assert(match(&R, m_ExtractLastElement(m_VPValue())) &&
"can only extract last element of FOR");
continue;
}

if (Plan.hasScalarVFOnly()) {
auto *I = cast<VPInstruction>(&R);
// Extracting from end with VF = 1 implies retrieving the last or
Expand Down
46 changes: 46 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "VPlanDominatorTree.h"
#include "VPlanHelpers.h"
#include "VPlanPatternMatch.h"
#include "VPlanUtils.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/TypeSwitch.h"

Expand All @@ -44,6 +45,9 @@ class VPlanVerifier {
/// incoming value into EVL's recipe.
bool verifyEVLRecipe(const VPInstruction &EVL) const;

/// Verify that \p LastActiveLane's operand is guaranteed to be a prefix-mask.
bool verifyLastActiveLaneRecipe(const VPInstruction &LastActiveLane) const;

bool verifyVPBasicBlock(const VPBasicBlock *VPBB);

bool verifyBlock(const VPBlockBase *VPB);
Expand Down Expand Up @@ -221,6 +225,44 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
});
}

bool VPlanVerifier::verifyLastActiveLaneRecipe(
const VPInstruction &LastActiveLane) const {
assert(LastActiveLane.getOpcode() == VPInstruction::LastActiveLane &&
"must be called with VPInstruction::LastActiveLane");

if (LastActiveLane.getNumOperands() < 1) {
errs() << "LastActiveLane must have at least one operand\n";
return false;
}

const VPlan &Plan = *LastActiveLane.getParent()->getPlan();
// All operands must be prefix-mask. Currently we check for header masks or
// EVL-derived masks, as those are currently the only operands in practice,
// but this may need updating in the future..
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// but this may need updating in the future..
// but this may need updating in the future.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done thanks

for (VPValue *Op : LastActiveLane.operands()) {
if (vputils::isHeaderMask(Op, Plan))
continue;

// Masks derived from EVL are also fine.
auto BroadcastOrEVL =
m_CombineOr(m_Broadcast(m_VPValue()), m_EVL(m_VPValue()));
if (match(Op, m_ICmp(m_StepVector(), BroadcastOrEVL)) ||
match(Op, m_ICmp(BroadcastOrEVL, m_StepVector())))
continue;

errs() << "LastActiveLane operand ";
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
VPSlotTracker Tracker(&Plan);
Op->printAsOperand(errs(), Tracker);
#endif
errs() << " must be prefix mask (a header mask or an "
"EVL-derived mask currently)\n";
return false;
}

return true;
}

bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
if (!verifyPhiRecipes(VPBB))
return false;
Expand Down Expand Up @@ -306,6 +348,10 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
return false;
}
break;
case VPInstruction::LastActiveLane:
if (!verifyLastActiveLaneRecipe(*VPI))
return false;
break;
default:
break;
}
Expand Down
Loading