Skip to content

Commit c6e38b9

Browse files
committed
Reapply "[LV] Improve AnyOf reduction codegen. (#78304)"
This reverts the revert commit 589c7ab. This patch includes a fix for any-of reductions and epilogue vectorization. Extra test coverage for the issue that caused the revert has been added in 399ff08. -------------------------------- Original commit message: Update AnyOf reduction code generation to only keep track of the AnyOf property in a boolean vector in the loop, only selecting either the new or start value in the middle block. The patch incorporates feedback from https://reviews.llvm.org/D153697. This fixes the #62565, as now there aren't multiple uses of the start/new values. Fixes llvm/llvm-project#62565 PR: llvm/llvm-project#78304
1 parent 2650375 commit c6e38b9

11 files changed

+425
-310
lines changed

llvm/include/llvm/Transforms/Utils/LoopUtils.h

-9
Original file line numberDiff line numberDiff line change
@@ -372,15 +372,6 @@ RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID);
372372
/// Returns the comparison predicate used when expanding a min/max reduction.
373373
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
374374

375-
/// See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we
376-
/// are trying to match. In this pattern, we are only ever selecting between two
377-
/// values: 1) an initial start value \p StartVal of the reduction PHI, and 2) a
378-
/// loop invariant value. If any of lane value in \p Left, \p Right is not equal
379-
/// to \p StartVal, select the loop invariant value. This is done by selecting
380-
/// \p Right iff \p Left is equal to \p StartVal.
381-
Value *createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
382-
Value *Left, Value *Right);
383-
384375
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
385376
/// The Builder's fast-math-flags must be set to propagate the expected values.
386377
Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,

llvm/lib/Transforms/Utils/LoopUtils.cpp

+6-18
Original file line numberDiff line numberDiff line change
@@ -1034,15 +1034,6 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
10341034
}
10351035
}
10361036

1037-
Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
1038-
RecurKind RK, Value *Left, Value *Right) {
1039-
if (auto VTy = dyn_cast<VectorType>(Left->getType()))
1040-
StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
1041-
Value *Cmp =
1042-
Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
1043-
return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
1044-
}
1045-
10461037
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
10471038
Value *Right) {
10481039
Type *Ty = Left->getType();
@@ -1151,16 +1142,13 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
11511142
NewVal = SI->getTrueValue();
11521143
}
11531144

1154-
// Create a splat vector with the new value and compare this to the vector
1155-
// we want to reduce.
1156-
ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
1157-
Value *Right = Builder.CreateVectorSplat(EC, InitVal);
1158-
Value *Cmp =
1159-
Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
1160-
11611145
// If any predicate is true it means that we want to select the new value.
1162-
Cmp = Builder.CreateOrReduce(Cmp);
1163-
return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
1146+
Value *AnyOf =
1147+
Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1148+
// The compares in the loop may yield poison, which propagates through the
1149+
// bitwise ORs. Freeze it here before the condition is used.
1150+
AnyOf = Builder.CreateFreeze(AnyOf);
1151+
return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
11641152
}
11651153

11661154
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,7 @@ class VPBuilder {
6868
public:
6969
VPBuilder() = default;
7070
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
71-
VPBuilder(VPRecipeBase *InsertPt) {
72-
setInsertPoint(InsertPt->getParent(), InsertPt->getIterator());
73-
}
71+
VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
7472

7573
/// Clear the insertion point: created instructions will not be inserted into
7674
/// a block.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+62-14
Original file line numberDiff line numberDiff line change
@@ -3055,9 +3055,8 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
30553055
}
30563056

30573057
// Create phi nodes to merge from the backedge-taken check block.
3058-
PHINode *BCResumeVal =
3059-
PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
3060-
LoopScalarPreHeader->getTerminator()->getIterator());
3058+
PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
3059+
LoopScalarPreHeader->getFirstNonPHI());
30613060
// Copy original phi DL over to the new one.
30623061
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
30633062

@@ -7460,7 +7459,6 @@ static void createAndCollectMergePhiForReduction(
74607459
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
74617460
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
74627461

7463-
TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
74647462
Value *FinalValue =
74657463
State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
74667464
auto *ResumePhi =
@@ -7485,7 +7483,7 @@ static void createAndCollectMergePhiForReduction(
74857483
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
74867484
Incoming);
74877485
else
7488-
BCBlockPhi->addIncoming(ReductionStartValue, Incoming);
7486+
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
74897487
}
74907488

74917489
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
@@ -7778,11 +7776,10 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
77787776

77797777
// Now, compare the remaining count and if there aren't enough iterations to
77807778
// execute the vectorized epilogue skip to the scalar part.
7781-
BasicBlock *VecEpilogueIterationCountCheck = LoopVectorPreHeader;
7782-
VecEpilogueIterationCountCheck->setName("vec.epilog.iter.check");
7783-
LoopVectorPreHeader =
7784-
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
7785-
LI, nullptr, "vec.epilog.ph");
7779+
LoopVectorPreHeader->setName("vec.epilog.ph");
7780+
BasicBlock *VecEpilogueIterationCountCheck =
7781+
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI,
7782+
nullptr, "vec.epilog.iter.check", true);
77867783
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
77877784
VecEpilogueIterationCountCheck);
77887785

@@ -8901,6 +8898,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
89018898
// A ComputeReductionResult recipe is added to the middle block, also for
89028899
// in-loop reductions which compute their result in-loop, because generating
89038900
// the subsequent bc.merge.rdx phi is driven by ComputeReductionResult recipes.
8901+
//
8902+
// Adjust AnyOf reductions; replace the reduction phi for the selected value
8903+
// with a boolean reduction phi node to check if the condition is true in any
8904+
// iteration. The final value is selected by the final ComputeReductionResult.
89048905
void LoopVectorizationPlanner::adjustRecipesForReductions(
89058906
VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
89068907
ElementCount MinVF) {
@@ -9074,6 +9075,41 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90749075
continue;
90759076

90769077
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9078+
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9079+
// with a boolean reduction phi node to check if the condition is true in
9080+
// any iteration. The final value is selected by the final
9081+
// ComputeReductionResult.
9082+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
9083+
RdxDesc.getRecurrenceKind())) {
9084+
auto *Select = cast<VPRecipeBase>(*find_if(PhiR->users(), [](VPUser *U) {
9085+
return isa<VPWidenSelectRecipe>(U) ||
9086+
(isa<VPReplicateRecipe>(U) &&
9087+
cast<VPReplicateRecipe>(U)->getUnderlyingInstr()->getOpcode() ==
9088+
Instruction::Select);
9089+
}));
9090+
VPValue *Cmp = Select->getOperand(0);
9091+
// If the compare is checking the reduction PHI node, adjust it to check
9092+
// the start value.
9093+
if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) {
9094+
for (unsigned I = 0; I != CmpR->getNumOperands(); ++I)
9095+
if (CmpR->getOperand(I) == PhiR)
9096+
CmpR->setOperand(I, PhiR->getStartValue());
9097+
}
9098+
VPBuilder::InsertPointGuard Guard(Builder);
9099+
Builder.setInsertPoint(Select);
9100+
9101+
// If the true value of the select is the reduction phi, the new value is
9102+
// selected if the negated condition is true in any iteration.
9103+
if (Select->getOperand(1) == PhiR)
9104+
Cmp = Builder.createNot(Cmp);
9105+
VPValue *Or = Builder.createOr(PhiR, Cmp);
9106+
Select->getVPSingleValue()->replaceAllUsesWith(Or);
9107+
9108+
// Convert the reduction phi to operate on bools.
9109+
PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
9110+
OrigLoop->getHeader()->getContext())));
9111+
}
9112+
90779113
// If tail is folded by masking, introduce selects between the phi
90789114
// and the live-out instruction of each reduction, at the beginning of the
90799115
// dedicated latch block.
@@ -9106,7 +9142,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
91069142
// then extend the loop exit value to enable InstCombine to evaluate the
91079143
// entire expression in the smaller type.
91089144
Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
9109-
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
9145+
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
9146+
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
9147+
RdxDesc.getRecurrenceKind())) {
91109148
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
91119149
Type *RdxTy = RdxDesc.getRecurrenceType();
91129150
auto *Trunc =
@@ -10198,9 +10236,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1019810236
Value *ResumeV = nullptr;
1019910237
// TODO: Move setting of resume values to prepareToExecute.
1020010238
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
10201-
ResumeV = ReductionResumeValues
10202-
.find(&ReductionPhi->getRecurrenceDescriptor())
10203-
->second;
10239+
const RecurrenceDescriptor &RdxDesc =
10240+
ReductionPhi->getRecurrenceDescriptor();
10241+
RecurKind RK = RdxDesc.getRecurrenceKind();
10242+
ResumeV = ReductionResumeValues.find(&RdxDesc)->second;
10243+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
10244+
// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
10245+
// start value; compare the final value from the main vector loop
10246+
// to the start value.
10247+
IRBuilder<> Builder(
10248+
cast<Instruction>(ResumeV)->getParent()->getFirstNonPHI());
10249+
ResumeV = Builder.CreateICmpNE(ResumeV,
10250+
RdxDesc.getRecurrenceStartValue());
10251+
}
1020410252
} else {
1020510253
// Create induction resume values for both widened pointer and
1020610254
// integer/fp inductions and update the start value of the induction

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,8 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
501501
// Reduce all of the unrolled parts into a single vector.
502502
Value *ReducedPartRdx = RdxParts[0];
503503
unsigned Op = RecurrenceDescriptor::getOpcode(RK);
504+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
505+
Op = Instruction::Or;
504506

505507
if (PhiR->isOrdered()) {
506508
ReducedPartRdx = RdxParts[State.UF - 1];
@@ -513,19 +515,16 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
513515
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
514516
ReducedPartRdx = Builder.CreateBinOp(
515517
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
516-
else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
517-
TrackingVH<Value> ReductionStartValue =
518-
RdxDesc.getRecurrenceStartValue();
519-
ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
520-
ReducedPartRdx, RdxPart);
521-
} else
518+
else
522519
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
523520
}
524521
}
525522

526523
// Create the reduction after the loop. Note that inloop reductions create
527524
// the target reduction in the loop using a Reduction recipe.
528-
if (State.VF.isVector() && !PhiR->isInLoop()) {
525+
if ((State.VF.isVector() ||
526+
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
527+
!PhiR->isInLoop()) {
529528
ReducedPartRdx =
530529
createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
531530
// If the reduction can be performed in a smaller type, we need to extend

0 commit comments

Comments
 (0)