Skip to content

Commit bccb7ed

Browse files
committed
Reapply "[LV] Improve AnyOf reduction codegen. (#78304)"
This reverts the revert commit c6e0162. This patch includes a fix for any-of reductions and epilogue vectorization. Extra test coverage for the issue that caused the revert has been added in bce3bfc and an assertion has been added in c7209cb. -------------------------------- Original commit message: Update AnyOf reduction code generation to only keep track of the AnyOf property in a boolean vector in the loop, only selecting either the new or start value in the middle block. The patch incorporates feedback from https://reviews.llvm.org/D153697. This fixes the #62565, as now there aren't multiple uses of the start/new values. Fixes #62565 PR: #78304
1 parent a13c514 commit bccb7ed

11 files changed

+619
-913
lines changed

llvm/include/llvm/Transforms/Utils/LoopUtils.h

-9
Original file line numberDiff line numberDiff line change
@@ -372,15 +372,6 @@ RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID);
372372
/// Returns the comparison predicate used when expanding a min/max reduction.
373373
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
374374

375-
/// See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we
376-
/// are trying to match. In this pattern, we are only ever selecting between two
377-
/// values: 1) an initial start value \p StartVal of the reduction PHI, and 2) a
378-
/// loop invariant value. If any of lane value in \p Left, \p Right is not equal
379-
/// to \p StartVal, select the loop invariant value. This is done by selecting
380-
/// \p Right iff \p Left is equal to \p StartVal.
381-
Value *createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
382-
Value *Left, Value *Right);
383-
384375
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
385376
/// The Builder's fast-math-flags must be set to propagate the expected values.
386377
Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,

llvm/lib/Transforms/Utils/LoopUtils.cpp

+6-18
Original file line numberDiff line numberDiff line change
@@ -1034,15 +1034,6 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
10341034
}
10351035
}
10361036

1037-
Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
1038-
RecurKind RK, Value *Left, Value *Right) {
1039-
if (auto VTy = dyn_cast<VectorType>(Left->getType()))
1040-
StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
1041-
Value *Cmp =
1042-
Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
1043-
return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
1044-
}
1045-
10461037
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
10471038
Value *Right) {
10481039
Type *Ty = Left->getType();
@@ -1151,16 +1142,13 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
11511142
NewVal = SI->getTrueValue();
11521143
}
11531144

1154-
// Create a splat vector with the new value and compare this to the vector
1155-
// we want to reduce.
1156-
ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
1157-
Value *Right = Builder.CreateVectorSplat(EC, InitVal);
1158-
Value *Cmp =
1159-
Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
1160-
11611145
// If any predicate is true it means that we want to select the new value.
1162-
Cmp = Builder.CreateOrReduce(Cmp);
1163-
return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
1146+
Value *AnyOf =
1147+
Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1148+
// The compares in the loop may yield poison, which propagates through the
1149+
// bitwise ORs. Freeze it here before the condition is used.
1150+
AnyOf = Builder.CreateFreeze(AnyOf);
1151+
return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
11641152
}
11651153

11661154
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,7 @@ class VPBuilder {
6868
public:
6969
VPBuilder() = default;
7070
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
71-
VPBuilder(VPRecipeBase *InsertPt) {
72-
setInsertPoint(InsertPt->getParent(), InsertPt->getIterator());
73-
}
71+
VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
7472

7573
/// Clear the insertion point: created instructions will not be inserted into
7674
/// a block.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+69-14
Original file line numberDiff line numberDiff line change
@@ -3036,9 +3036,8 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
30363036
}
30373037

30383038
// Create phi nodes to merge from the backedge-taken check block.
3039-
PHINode *BCResumeVal =
3040-
PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
3041-
LoopScalarPreHeader->getTerminator()->getIterator());
3039+
PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
3040+
LoopScalarPreHeader->getFirstNonPHI());
30423041
// Copy original phi DL over to the new one.
30433042
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
30443043

@@ -7453,11 +7452,17 @@ static void createAndCollectMergePhiForReduction(
74537452
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
74547453
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
74557454

7456-
TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
74577455
Value *FinalValue =
74587456
State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
74597457
auto *ResumePhi =
74607458
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
7459+
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
7460+
RdxDesc.getRecurrenceKind())) {
7461+
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue());
7462+
assert(Cmp->getPredicate() == CmpInst::ICMP_NE);
7463+
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue());
7464+
ResumePhi = cast<PHINode>(Cmp->getOperand(0));
7465+
}
74617466
assert((!VectorizingEpilogue || ResumePhi) &&
74627467
"when vectorizing the epilogue loop, we need a resume phi from main "
74637468
"vector loop");
@@ -7481,7 +7486,7 @@ static void createAndCollectMergePhiForReduction(
74817486
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
74827487
Incoming);
74837488
else
7484-
BCBlockPhi->addIncoming(ReductionStartValue, Incoming);
7489+
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
74857490
}
74867491

74877492
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
@@ -7774,11 +7779,10 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
77747779

77757780
// Now, compare the remaining count and if there aren't enough iterations to
77767781
// execute the vectorized epilogue skip to the scalar part.
7777-
BasicBlock *VecEpilogueIterationCountCheck = LoopVectorPreHeader;
7778-
VecEpilogueIterationCountCheck->setName("vec.epilog.iter.check");
7779-
LoopVectorPreHeader =
7780-
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
7781-
LI, nullptr, "vec.epilog.ph");
7782+
LoopVectorPreHeader->setName("vec.epilog.ph");
7783+
BasicBlock *VecEpilogueIterationCountCheck =
7784+
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI,
7785+
nullptr, "vec.epilog.iter.check", true);
77827786
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
77837787
VecEpilogueIterationCountCheck);
77847788

@@ -8913,6 +8917,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
89138917
// A ComputeReductionResult recipe is added to the middle block, also for
89148918
// in-loop reductions which compute their result in-loop, because generating
89158919
// the subsequent bc.merge.rdx phi is driven by ComputeReductionResult recipes.
8920+
//
8921+
// Adjust AnyOf reductions; replace the reduction phi for the selected value
8922+
// with a boolean reduction phi node to check if the condition is true in any
8923+
// iteration. The final value is selected by the final ComputeReductionResult.
89168924
void LoopVectorizationPlanner::adjustRecipesForReductions(
89178925
VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
89188926
ElementCount MinVF) {
@@ -9087,6 +9095,41 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90879095
continue;
90889096

90899097
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9098+
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9099+
// with a boolean reduction phi node to check if the condition is true in
9100+
// any iteration. The final value is selected by the final
9101+
// ComputeReductionResult.
9102+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
9103+
RdxDesc.getRecurrenceKind())) {
9104+
auto *Select = cast<VPRecipeBase>(*find_if(PhiR->users(), [](VPUser *U) {
9105+
return isa<VPWidenSelectRecipe>(U) ||
9106+
(isa<VPReplicateRecipe>(U) &&
9107+
cast<VPReplicateRecipe>(U)->getUnderlyingInstr()->getOpcode() ==
9108+
Instruction::Select);
9109+
}));
9110+
VPValue *Cmp = Select->getOperand(0);
9111+
// If the compare is checking the reduction PHI node, adjust it to check
9112+
// the start value.
9113+
if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) {
9114+
for (unsigned I = 0; I != CmpR->getNumOperands(); ++I)
9115+
if (CmpR->getOperand(I) == PhiR)
9116+
CmpR->setOperand(I, PhiR->getStartValue());
9117+
}
9118+
VPBuilder::InsertPointGuard Guard(Builder);
9119+
Builder.setInsertPoint(Select);
9120+
9121+
// If the true value of the select is the reduction phi, the new value is
9122+
// selected if the negated condition is true in any iteration.
9123+
if (Select->getOperand(1) == PhiR)
9124+
Cmp = Builder.createNot(Cmp);
9125+
VPValue *Or = Builder.createOr(PhiR, Cmp);
9126+
Select->getVPSingleValue()->replaceAllUsesWith(Or);
9127+
9128+
// Convert the reduction phi to operate on bools.
9129+
PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
9130+
OrigLoop->getHeader()->getContext())));
9131+
}
9132+
90909133
// If tail is folded by masking, introduce selects between the phi
90919134
// and the live-out instruction of each reduction, at the beginning of the
90929135
// dedicated latch block.
@@ -9119,7 +9162,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
91199162
// then extend the loop exit value to enable InstCombine to evaluate the
91209163
// entire expression in the smaller type.
91219164
Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
9122-
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
9165+
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
9166+
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
9167+
RdxDesc.getRecurrenceKind())) {
91239168
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
91249169
Type *RdxTy = RdxDesc.getRecurrenceType();
91259170
auto *Trunc =
@@ -10164,9 +10209,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1016410209
Value *ResumeV = nullptr;
1016510210
// TODO: Move setting of resume values to prepareToExecute.
1016610211
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
10167-
ResumeV = ReductionResumeValues
10168-
.find(&ReductionPhi->getRecurrenceDescriptor())
10169-
->second;
10212+
const RecurrenceDescriptor &RdxDesc =
10213+
ReductionPhi->getRecurrenceDescriptor();
10214+
RecurKind RK = RdxDesc.getRecurrenceKind();
10215+
ResumeV = ReductionResumeValues.find(&RdxDesc)->second;
10216+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
10217+
// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
10218+
// start value; compare the final value from the main vector loop
10219+
// to the start value.
10220+
IRBuilder<> Builder(
10221+
cast<Instruction>(ResumeV)->getParent()->getFirstNonPHI());
10222+
ResumeV = Builder.CreateICmpNE(ResumeV,
10223+
RdxDesc.getRecurrenceStartValue());
10224+
}
1017010225
} else {
1017110226
// Create induction resume values for both widened pointer and
1017210227
// integer/fp inductions and update the start value of the induction

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,8 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
513513
// Reduce all of the unrolled parts into a single vector.
514514
Value *ReducedPartRdx = RdxParts[0];
515515
unsigned Op = RecurrenceDescriptor::getOpcode(RK);
516+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
517+
Op = Instruction::Or;
516518

517519
if (PhiR->isOrdered()) {
518520
ReducedPartRdx = RdxParts[State.UF - 1];
@@ -525,19 +527,16 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
525527
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
526528
ReducedPartRdx = Builder.CreateBinOp(
527529
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
528-
else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
529-
TrackingVH<Value> ReductionStartValue =
530-
RdxDesc.getRecurrenceStartValue();
531-
ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
532-
ReducedPartRdx, RdxPart);
533-
} else
530+
else
534531
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
535532
}
536533
}
537534

538535
// Create the reduction after the loop. Note that inloop reductions create
539536
// the target reduction in the loop using a Reduction recipe.
540-
if (State.VF.isVector() && !PhiR->isInLoop()) {
537+
if ((State.VF.isVector() ||
538+
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
539+
!PhiR->isInLoop()) {
541540
ReducedPartRdx =
542541
createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
543542
// If the reduction can be performed in a smaller type, we need to extend

0 commit comments

Comments
 (0)