Skip to content

Commit c6e0162

Browse files
committed
Revert "Reapply "[LV] Improve AnyOf reduction codegen. (#78304)""
This reverts commit c6e38b9. Causes miscompiles, see comments on #78304.
1 parent 885b8d9 commit c6e0162

11 files changed

+904
-600
lines changed

llvm/include/llvm/Transforms/Utils/LoopUtils.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,15 @@ RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID);
372372
/// Returns the comparison predicate used when expanding a min/max reduction.
373373
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
374374

375+
/// See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we
376+
/// are trying to match. In this pattern, we are only ever selecting between two
377+
/// values: 1) an initial start value \p StartVal of the reduction PHI, and 2) a
378+
/// loop invariant value. If any of lane value in \p Left, \p Right is not equal
379+
/// to \p StartVal, select the loop invariant value. This is done by selecting
380+
/// \p Right iff \p Left is equal to \p StartVal.
381+
Value *createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
382+
Value *Left, Value *Right);
383+
375384
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
376385
/// The Builder's fast-math-flags must be set to propagate the expected values.
377386
Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,15 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
10341034
}
10351035
}
10361036

1037+
Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal,
1038+
RecurKind RK, Value *Left, Value *Right) {
1039+
if (auto VTy = dyn_cast<VectorType>(Left->getType()))
1040+
StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
1041+
Value *Cmp =
1042+
Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
1043+
return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
1044+
}
1045+
10371046
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
10381047
Value *Right) {
10391048
Type *Ty = Left->getType();
@@ -1142,13 +1151,16 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src,
11421151
NewVal = SI->getTrueValue();
11431152
}
11441153

1154+
// Create a splat vector with the new value and compare this to the vector
1155+
// we want to reduce.
1156+
ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
1157+
Value *Right = Builder.CreateVectorSplat(EC, InitVal);
1158+
Value *Cmp =
1159+
Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
1160+
11451161
// If any predicate is true it means that we want to select the new value.
1146-
Value *AnyOf =
1147-
Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src;
1148-
// The compares in the loop may yield poison, which propagates through the
1149-
// bitwise ORs. Freeze it here before the condition is used.
1150-
AnyOf = Builder.CreateFreeze(AnyOf);
1151-
return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select");
1162+
Cmp = Builder.CreateOrReduce(Cmp);
1163+
return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
11521164
}
11531165

11541166
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src,

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,9 @@ class VPBuilder {
6868
public:
6969
VPBuilder() = default;
7070
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
71-
VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
71+
VPBuilder(VPRecipeBase *InsertPt) {
72+
setInsertPoint(InsertPt->getParent(), InsertPt->getIterator());
73+
}
7274

7375
/// Clear the insertion point: created instructions will not be inserted into
7476
/// a block.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -3051,8 +3051,9 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
30513051
}
30523052

30533053
// Create phi nodes to merge from the backedge-taken check block.
3054-
PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
3055-
LoopScalarPreHeader->getFirstNonPHI());
3054+
PHINode *BCResumeVal =
3055+
PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
3056+
LoopScalarPreHeader->getTerminator()->getIterator());
30563057
// Copy original phi DL over to the new one.
30573058
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
30583059

@@ -7450,6 +7451,7 @@ static void createAndCollectMergePhiForReduction(
74507451
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
74517452
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
74527453

7454+
TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
74537455
Value *FinalValue =
74547456
State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
74557457
auto *ResumePhi =
@@ -7474,7 +7476,7 @@ static void createAndCollectMergePhiForReduction(
74747476
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
74757477
Incoming);
74767478
else
7477-
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
7479+
BCBlockPhi->addIncoming(ReductionStartValue, Incoming);
74787480
}
74797481

74807482
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
@@ -7767,10 +7769,11 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
77677769

77687770
// Now, compare the remaining count and if there aren't enough iterations to
77697771
// execute the vectorized epilogue skip to the scalar part.
7770-
LoopVectorPreHeader->setName("vec.epilog.ph");
7771-
BasicBlock *VecEpilogueIterationCountCheck =
7772-
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI,
7773-
nullptr, "vec.epilog.iter.check", true);
7772+
BasicBlock *VecEpilogueIterationCountCheck = LoopVectorPreHeader;
7773+
VecEpilogueIterationCountCheck->setName("vec.epilog.iter.check");
7774+
LoopVectorPreHeader =
7775+
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
7776+
LI, nullptr, "vec.epilog.ph");
77747777
emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader,
77757778
VecEpilogueIterationCountCheck);
77767779

@@ -8893,10 +8896,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
88938896
// A ComputeReductionResult recipe is added to the middle block, also for
88948897
// in-loop reductions which compute their result in-loop, because generating
88958898
// the subsequent bc.merge.rdx phi is driven by ComputeReductionResult recipes.
8896-
//
8897-
// Adjust AnyOf reductions; replace the reduction phi for the selected value
8898-
// with a boolean reduction phi node to check if the condition is true in any
8899-
// iteration. The final value is selected by the final ComputeReductionResult.
89008899
void LoopVectorizationPlanner::adjustRecipesForReductions(
89018900
VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
89028901
ElementCount MinVF) {
@@ -9071,41 +9070,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90719070
continue;
90729071

90739072
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9074-
// Adjust AnyOf reductions; replace the reduction phi for the selected value
9075-
// with a boolean reduction phi node to check if the condition is true in
9076-
// any iteration. The final value is selected by the final
9077-
// ComputeReductionResult.
9078-
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
9079-
RdxDesc.getRecurrenceKind())) {
9080-
auto *Select = cast<VPRecipeBase>(*find_if(PhiR->users(), [](VPUser *U) {
9081-
return isa<VPWidenSelectRecipe>(U) ||
9082-
(isa<VPReplicateRecipe>(U) &&
9083-
cast<VPReplicateRecipe>(U)->getUnderlyingInstr()->getOpcode() ==
9084-
Instruction::Select);
9085-
}));
9086-
VPValue *Cmp = Select->getOperand(0);
9087-
// If the compare is checking the reduction PHI node, adjust it to check
9088-
// the start value.
9089-
if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) {
9090-
for (unsigned I = 0; I != CmpR->getNumOperands(); ++I)
9091-
if (CmpR->getOperand(I) == PhiR)
9092-
CmpR->setOperand(I, PhiR->getStartValue());
9093-
}
9094-
VPBuilder::InsertPointGuard Guard(Builder);
9095-
Builder.setInsertPoint(Select);
9096-
9097-
// If the true value of the select is the reduction phi, the new value is
9098-
// selected if the negated condition is true in any iteration.
9099-
if (Select->getOperand(1) == PhiR)
9100-
Cmp = Builder.createNot(Cmp);
9101-
VPValue *Or = Builder.createOr(PhiR, Cmp);
9102-
Select->getVPSingleValue()->replaceAllUsesWith(Or);
9103-
9104-
// Convert the reduction phi to operate on bools.
9105-
PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
9106-
OrigLoop->getHeader()->getContext())));
9107-
}
9108-
91099073
// If tail is folded by masking, introduce selects between the phi
91109074
// and the live-out instruction of each reduction, at the beginning of the
91119075
// dedicated latch block.
@@ -9138,9 +9102,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
91389102
// then extend the loop exit value to enable InstCombine to evaluate the
91399103
// entire expression in the smaller type.
91409104
Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
9141-
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() &&
9142-
!RecurrenceDescriptor::isAnyOfRecurrenceKind(
9143-
RdxDesc.getRecurrenceKind())) {
9105+
if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
91449106
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
91459107
Type *RdxTy = RdxDesc.getRecurrenceType();
91469108
auto *Trunc =
@@ -10181,19 +10143,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1018110143
Value *ResumeV = nullptr;
1018210144
// TODO: Move setting of resume values to prepareToExecute.
1018310145
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
10184-
const RecurrenceDescriptor &RdxDesc =
10185-
ReductionPhi->getRecurrenceDescriptor();
10186-
RecurKind RK = RdxDesc.getRecurrenceKind();
10187-
ResumeV = ReductionResumeValues.find(&RdxDesc)->second;
10188-
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
10189-
// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
10190-
// start value; compare the final value from the main vector loop
10191-
// to the start value.
10192-
IRBuilder<> Builder(
10193-
cast<Instruction>(ResumeV)->getParent()->getFirstNonPHI());
10194-
ResumeV = Builder.CreateICmpNE(ResumeV,
10195-
RdxDesc.getRecurrenceStartValue());
10196-
}
10146+
ResumeV = ReductionResumeValues
10147+
.find(&ReductionPhi->getRecurrenceDescriptor())
10148+
->second;
1019710149
} else {
1019810150
// Create induction resume values for both widened pointer and
1019910151
// integer/fp inductions and update the start value of the induction

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -501,8 +501,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
501501
// Reduce all of the unrolled parts into a single vector.
502502
Value *ReducedPartRdx = RdxParts[0];
503503
unsigned Op = RecurrenceDescriptor::getOpcode(RK);
504-
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
505-
Op = Instruction::Or;
506504

507505
if (PhiR->isOrdered()) {
508506
ReducedPartRdx = RdxParts[State.UF - 1];
@@ -515,16 +513,19 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
515513
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
516514
ReducedPartRdx = Builder.CreateBinOp(
517515
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
518-
else
516+
else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
517+
TrackingVH<Value> ReductionStartValue =
518+
RdxDesc.getRecurrenceStartValue();
519+
ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK,
520+
ReducedPartRdx, RdxPart);
521+
} else
519522
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
520523
}
521524
}
522525

523526
// Create the reduction after the loop. Note that inloop reductions create
524527
// the target reduction in the loop using a Reduction recipe.
525-
if ((State.VF.isVector() ||
526-
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
527-
!PhiR->isInLoop()) {
528+
if (State.VF.isVector() && !PhiR->isInLoop()) {
528529
ReducedPartRdx =
529530
createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
530531
// If the reduction can be performed in a smaller type, we need to extend

0 commit comments

Comments
 (0)