Skip to content

Commit 95fef1d

Browse files
authored
[LV] Improve AnyOf reduction codegen. (#78304)
Update AnyOf reduction code generation to only keep track of the AnyOf property in a boolean vector in the loop, only selecting either the new or start value in the middle block. The patch incorporates feedback from https://reviews.llvm.org/D153697. This fixes the #62565, as now there aren't multiple uses of the start/new values. Fixes #62565 PR: #78304
1 parent a82ca39 commit 95fef1d

File tree

10 files changed

+279
-274
lines changed

10 files changed

+279
-274
lines changed

llvm/include/llvm/Transforms/Utils/LoopUtils.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -372,15 +372,6 @@ RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID);
372372
/// Returns the comparison predicate used when expanding a min/max reduction.
373373
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
374374

375-
/// See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we
376-
/// are trying to match. In this pattern, we are only ever selecting between two
377-
/// values: 1) an initial start value \p StartVal of the reduction PHI, and 2) a
378-
/// loop invariant value. If any of lane value in \p Left, \p Right is not equal
379-
/// to \p StartVal, select the loop invariant value. This is done by selecting
380-
/// \p Right iff \p Left is equal to \p StartVal.
381-
Value *createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
382-
Value *Left, Value *Right);
383-
384375
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
385376
/// The Builder's fast-math-flags must be set to propagate the expected values.
386377
Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,15 +1034,6 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
10341034
}
10351035
}
10361036

1037-
Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
1038-
RecurKind RK, Value *Left, Value *Right) {
1039-
if (auto VTy = dyn_cast<VectorType>(Left->getType()))
1040-
StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
1041-
Value *Cmp =
1042-
Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
1043-
return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
1044-
}
1045-
10461037
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
10471038
Value *Right) {
10481039
Type *Ty = Left->getType();
@@ -1151,16 +1142,13 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
11511142
NewVal = SI->getTrueValue();
11521143
}
11531144

1154-
// Create a splat vector with the new value and compare this to the vector
1155-
// we want to reduce.
1156-
ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
1157-
Value *Right = Builder.CreateVectorSplat(EC, InitVal);
1158-
Value *Cmp =
1159-
Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
1160-
11611145
// If any predicate is true it means that we want to select the new value.
1162-
Cmp = Builder.CreateOrReduce(Cmp);
1163-
return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
1146+
Value *AnyOf =
1147+
Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1148+
// The compares in the loop may yield poison, which propagates through the
1149+
// bitwise ORs. Freeze it here before the condition is used.
1150+
AnyOf = Builder.CreateFreeze(AnyOf);
1151+
return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
11641152
}
11651153

11661154
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class VPBuilder {
6868
public:
6969
VPBuilder() = default;
7070
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
71+
VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
7172

7273
/// Clear the insertion point: created instructions will not be inserted into
7374
/// a block.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7405,7 +7405,6 @@ static void createAndCollectMergePhiForReduction(
74057405
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
74067406
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
74077407

7408-
TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
74097408
Value *FinalValue =
74107409
State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
74117410
auto *ResumePhi =
@@ -7430,7 +7429,7 @@ static void createAndCollectMergePhiForReduction(
74307429
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
74317430
Incoming);
74327431
else
7433-
BCBlockPhi->addIncoming(ReductionStartValue, Incoming);
7432+
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
74347433
}
74357434

74367435
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
@@ -8854,6 +8853,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
88548853
// A ComputeReductionResult recipe is added to the middle block, also for
88558854
// in-loop reductions which compute their result in-loop, because generating
88568855
// the subsequent bc.merge.rdx phi is driven by ComputeReductionResult recipes.
8856+
//
8857+
// Adjust AnyOf reductions; replace the reduction phi for the selected value
8858+
// with a boolean reduction phi node to check if the condition is true in any
8859+
// iteration. The final value is selected by the final ComputeReductionResult.
88578860
void LoopVectorizationPlanner::adjustRecipesForReductions(
88588861
VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
88598862
ElementCount MinVF) {
@@ -9027,6 +9030,41 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90279030
continue;
90289031

90299032
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9033+
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9034+
// with a boolean reduction phi node to check if the condition is true in
9035+
// any iteration. The final value is selected by the final
9036+
// ComputeReductionResult.
9037+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
9038+
RdxDesc.getRecurrenceKind())) {
9039+
auto *Select = cast<VPRecipeBase>(*find_if(PhiR->users(), [](VPUser *U) {
9040+
return isa<VPWidenSelectRecipe>(U) ||
9041+
(isa<VPReplicateRecipe>(U) &&
9042+
cast<VPReplicateRecipe>(U)->getUnderlyingInstr()->getOpcode() ==
9043+
Instruction::Select);
9044+
}));
9045+
VPValue *Cmp = Select->getOperand(0);
9046+
// If the compare is checking the reduction PHI node, adjust it to check
9047+
// the start value.
9048+
if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) {
9049+
for (unsigned I = 0; I != CmpR->getNumOperands(); ++I)
9050+
if (CmpR->getOperand(I) == PhiR)
9051+
CmpR->setOperand(I, PhiR->getStartValue());
9052+
}
9053+
VPBuilder::InsertPointGuard Guard(Builder);
9054+
Builder.setInsertPoint(Select);
9055+
9056+
// If the true value of the select is the reduction phi, the new value is
9057+
// selected if the negated condition is true in any iteration.
9058+
if (Select->getOperand(1) == PhiR)
9059+
Cmp = Builder.createNot(Cmp);
9060+
VPValue *Or = Builder.createOr(PhiR, Cmp);
9061+
Select->getVPSingleValue()->replaceAllUsesWith(Or);
9062+
9063+
// Convert the reduction phi to operate on bools.
9064+
PhiR->setOperand(0, Plan->getVPValueOrAddLiveIn(ConstantInt::getFalse(
9065+
OrigLoop->getHeader()->getContext())));
9066+
}
9067+
90309068
// If tail is folded by masking, introduce selects between the phi
90319069
// and the live-out instruction of each reduction, at the beginning of the
90329070
// dedicated latch block.
@@ -9059,7 +9097,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90599097
// then extend the loop exit value to enable InstCombine to evaluate the
90609098
// entire expression in the smaller type.
90619099
Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
9062-
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
9100+
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
9101+
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
9102+
RdxDesc.getRecurrenceKind())) {
90639103
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
90649104
Type *RdxTy = RdxDesc.getRecurrenceType();
90659105
auto *Trunc =

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
442442
// Reduce all of the unrolled parts into a single vector.
443443
Value *ReducedPartRdx = RdxParts[0];
444444
unsigned Op = RecurrenceDescriptor::getOpcode(RK);
445+
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
446+
Op = Instruction::Or;
445447

446448
if (PhiR->isOrdered()) {
447449
ReducedPartRdx = RdxParts[State.UF - 1];
@@ -454,19 +456,16 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
454456
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
455457
ReducedPartRdx = Builder.CreateBinOp(
456458
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
457-
else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
458-
TrackingVH<Value> ReductionStartValue =
459-
RdxDesc.getRecurrenceStartValue();
460-
ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
461-
ReducedPartRdx, RdxPart);
462-
} else
459+
else
463460
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
464461
}
465462
}
466463

467464
// Create the reduction after the loop. Note that inloop reductions create
468465
// the target reduction in the loop using a Reduction recipe.
469-
if (State.VF.isVector() && !PhiR->isInLoop()) {
466+
if ((State.VF.isVector() ||
467+
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
468+
!PhiR->isInLoop()) {
470469
ReducedPartRdx =
471470
createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
472471
// If the reduction can be performed in a smaller type, we need to extend

0 commit comments

Comments
 (0)