Skip to content

Commit ac146a7

Browse files
committed
[LV] Use VPReductionRecipe for partial reductions
Partial reductions can easily be represented by the VPReductionRecipe class by setting their scale factor to something greater than 1. This PR merges the two together and gives VPReductionRecipe a VFScaleFactor so that it can choose to generate the partial reduction intrinsic at execute time. Depends on #144281
1 parent 334d438 commit ac146a7

File tree

12 files changed

+556
-989
lines changed

12 files changed

+556
-989
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6954,7 +6954,8 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
69546954
}
69556955
// The VPlan-based cost model is more accurate for partial reduction and
69566956
// comparing against the legacy cost isn't desirable.
6957-
if (isa<VPPartialReductionRecipe>(&R))
6957+
if (auto *VPR = dyn_cast<VPReductionRecipe>(&R);
6958+
VPR && VPR->isPartialReduction())
69586959
return true;
69596960

69606961
// The VPlan-based cost model can analyze if recipes are scalar
@@ -8075,11 +8076,21 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
80758076
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
80768077

80778078
// If the PHI is used by a partial reduction, set the scale factor.
8079+
bool UseInLoopReduction = CM.isInLoopReduction(Phi);
8080+
bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
80788081
unsigned ScaleFactor =
80798082
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
8080-
PhiRecipe = new VPReductionPHIRecipe(
8081-
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
8082-
CM.useOrderedReductions(RdxDesc), ScaleFactor);
8083+
RdxStyle Style(RdxNormal{});
8084+
if (UseInLoopReduction) {
8085+
if (UseOrderedReductions)
8086+
Style = RdxOrderedInLoop{};
8087+
else
8088+
Style = RdxInLoop{};
8089+
} else if (ScaleFactor > 1) {
8090+
Style = RdxPartial{/*VFScaleFactor=*/ScaleFactor};
8091+
}
8092+
PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc.getRecurrenceKind(),
8093+
*StartV, Style);
80838094
} else {
80848095
// TODO: Currently fixed-order recurrences are modeled as chains of
80858096
// first-order recurrences. If there are no users of the intermediate
@@ -8147,7 +8158,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
81478158
VPValue *Accumulator = Operands[1];
81488159
VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe();
81498160
if (isa<VPReductionPHIRecipe>(BinOpRecipe) ||
8150-
isa<VPPartialReductionRecipe>(BinOpRecipe))
8161+
(isa<VPReductionRecipe>(BinOpRecipe) &&
8162+
cast<VPReductionRecipe>(BinOpRecipe)->isPartialReduction()))
81518163
std::swap(BinOp, Accumulator);
81528164

81538165
if (ScaleFactor !=
@@ -8172,12 +8184,11 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
81728184
"Expected an ADD or SUB operation for predicated partial "
81738185
"reductions (because the neutral element in the mask is zero)!");
81748186
Cond = getBlockInMask(Builder.getInsertBlock());
8175-
VPValue *Zero =
8176-
Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
8177-
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
81788187
}
8179-
return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
8180-
ScaleFactor, Reduction);
8188+
8189+
return new VPReductionRecipe(RecurKind::Add, FastMathFlags(), Reduction,
8190+
Accumulator, BinOp, Cond,
8191+
RdxPartial{/*VFScaleFactor=*/ScaleFactor});
81818192
}
81828193

81838194
void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
@@ -8687,9 +8698,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
86878698
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
86888699
? RdxDesc.getFastMathFlags()
86898700
: FastMathFlags();
8690-
auto *RedRecipe = new VPReductionRecipe(
8691-
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
8692-
PhiR->isOrdered(), CurrentLinkI->getDebugLoc());
8701+
bool UseOrderedReductions = PhiR->isOrdered();
8702+
RdxStyle Style = UseOrderedReductions ? RdxStyle(RdxOrderedInLoop{})
8703+
: RdxStyle(RdxInLoop{});
8704+
auto *RedRecipe =
8705+
new VPReductionRecipe(Kind, FMFs, CurrentLinkI, PreviousLink, VecOp,
8706+
CondOp, Style, CurrentLinkI->getDebugLoc());
86938707
// Append the recipe to the end of the VPBasicBlock because we need to
86948708
// ensure that it comes after all of it's inputs, including CondOp.
86958709
// Delete CurrentLink as it will be invalid if its operand is replaced
@@ -8724,8 +8738,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
87248738
// Don't output selects for partial reductions because they have an output
87258739
// with fewer lanes than the VF. So the operands of the select would have
87268740
// different numbers of lanes. Partial reductions mask the input instead.
8741+
auto *RR = dyn_cast<VPReductionRecipe>(OrigExitingVPV->getDefiningRecipe());
87278742
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
8728-
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
8743+
(!RR || !RR->isPartialReduction())) {
87298744
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
87308745
std::optional<FastMathFlags> FMFs =
87318746
PhiTy->isFloatingPointTy()

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 71 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <functional>
4444
#include <string>
4545
#include <utility>
46+
#include <variant>
4647

4748
namespace llvm {
4849

@@ -553,7 +554,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
553554
case VPRecipeBase::VPWidenIntOrFpInductionSC:
554555
case VPRecipeBase::VPWidenPointerInductionSC:
555556
case VPRecipeBase::VPReductionPHISC:
556-
case VPRecipeBase::VPPartialReductionSC:
557557
return true;
558558
case VPRecipeBase::VPBranchOnMaskSC:
559559
case VPRecipeBase::VPInterleaveEVLSC:
@@ -2327,6 +2327,23 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
23272327
}
23282328
};
23292329

2330+
// Possible variants of a reduction.
2331+
2332+
// This reduction is ordered and in-loop.
2333+
struct RdxOrderedInLoop {};
2334+
// This reduction is in-loop.
2335+
struct RdxInLoop {};
2336+
// This reduction isn't partial, ordered or in-loop.
2337+
struct RdxNormal {};
2338+
// This reduction is partial and its output is a vector whose length is scaled
2339+
// by the VF.
2340+
struct RdxPartial {
2341+
// The factor by which the output is scaled down from the VF.
2342+
unsigned VFScaleFactor;
2343+
};
2344+
typedef std::variant<RdxOrderedInLoop, RdxInLoop, RdxNormal, RdxPartial>
2345+
RdxStyle;
2346+
23302347
/// A recipe for handling reduction phis. The start value is the first operand
23312348
/// of the recipe and the incoming value from the backedge is the second
23322349
/// operand.
@@ -2335,32 +2352,21 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
23352352
/// The recurrence kind of the reduction.
23362353
const RecurKind Kind;
23372354

2338-
/// The phi is part of an in-loop reduction.
2339-
bool IsInLoop;
2340-
2341-
/// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2342-
bool IsOrdered;
2343-
2344-
/// When expanding the reduction PHI, the plan's VF element count is divided
2345-
/// by this factor to form the reduction phi's VF.
2346-
unsigned VFScaleFactor = 1;
2355+
RdxStyle Style;
23472356

23482357
public:
23492358
/// Create a new VPReductionPHIRecipe for the reduction \p Phi.
23502359
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
2351-
bool IsInLoop = false, bool IsOrdered = false,
2352-
unsigned VFScaleFactor = 1)
2360+
RdxStyle Style)
23532361
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2354-
IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2355-
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2356-
}
2362+
Style(Style) {}
23572363

23582364
~VPReductionPHIRecipe() override = default;
23592365

23602366
VPReductionPHIRecipe *clone() override {
23612367
auto *R = new VPReductionPHIRecipe(
23622368
dyn_cast_or_null<PHINode>(getUnderlyingValue()), getRecurrenceKind(),
2363-
*getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
2369+
*getOperand(0), Style);
23642370
R->addOperand(getBackedgeValue());
23652371
return R;
23662372
}
@@ -2370,8 +2376,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
23702376
/// Generate the phi/select nodes.
23712377
void execute(VPTransformState &State) override;
23722378

2373-
/// Get the factor that the VF of this recipe's output should be scaled by.
2374-
unsigned getVFScaleFactor() const { return VFScaleFactor; }
2379+
/// Get the factor that the VF of this recipe's output should be scaled by, or
2380+
/// null if it isn't scaled.
2381+
std::optional<unsigned> getVFScaleFactor() const {
2382+
auto *Partial = std::get_if<RdxPartial>(&Style);
2383+
return Partial ? std::make_optional(Partial->VFScaleFactor) : std::nullopt;
2384+
}
23752385

23762386
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
23772387
/// Print the recipe.
@@ -2388,10 +2398,18 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
23882398
RecurKind getRecurrenceKind() const { return Kind; }
23892399

23902400
/// Returns true, if the phi is part of an ordered reduction.
2391-
bool isOrdered() const { return IsOrdered; }
2401+
bool isOrdered() const {
2402+
return std::holds_alternative<RdxOrderedInLoop>(Style);
2403+
}
2404+
2405+
/// Returns true if the phi is part of an in-loop reduction.
2406+
bool isInLoop() const {
2407+
return std::holds_alternative<RdxInLoop>(Style) ||
2408+
std::holds_alternative<RdxOrderedInLoop>(Style);
2409+
}
23922410

2393-
/// Returns true, if the phi is part of an in-loop reduction.
2394-
bool isInLoop() const { return IsInLoop; }
2411+
/// Returns true if the reduction outputs a vector with a scaled down VF.
2412+
bool isPartialReduction() const { return getVFScaleFactor() > 1; }
23952413

23962414
/// Returns true if the recipe only uses the first lane of operand \p Op.
23972415
bool onlyFirstLaneUsed(const VPValue *Op) const override {
@@ -2663,23 +2681,25 @@ class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
26632681
}
26642682
};
26652683

2666-
/// A recipe to represent inloop reduction operations, performing a reduction on
2667-
/// a vector operand into a scalar value, and adding the result to a chain.
2668-
/// The Operands are {ChainOp, VecOp, [Condition]}.
2684+
/// A recipe to represent inloop, ordered or partial reduction operations. It
2685+
/// performs a reduction on a vector operand into a scalar (vector in the case
2686+
/// of a partial reduction) value, and adds the result to a chain. The Operands
2687+
/// are {ChainOp, VecOp, [Condition]}.
26692688
class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
2689+
26702690
/// The recurrence kind for the reduction in question.
26712691
RecurKind RdxKind;
2672-
bool IsOrdered;
26732692
/// Whether the reduction is conditional.
26742693
bool IsConditional = false;
2694+
RdxStyle Style;
26752695

26762696
protected:
26772697
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
26782698
FastMathFlags FMFs, Instruction *I,
26792699
ArrayRef<VPValue *> Operands, VPValue *CondOp,
2680-
bool IsOrdered, DebugLoc DL)
2700+
RdxStyle Style, DebugLoc DL)
26812701
: VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2682-
IsOrdered(IsOrdered) {
2702+
Style(Style) {
26832703
if (CondOp) {
26842704
IsConditional = true;
26852705
addOperand(CondOp);
@@ -2690,30 +2710,29 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
26902710
public:
26912711
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
26922712
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2693-
bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2713+
RdxStyle Style, DebugLoc DL = DebugLoc::getUnknown())
26942714
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2695-
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2696-
IsOrdered, DL) {}
2715+
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2716+
DL) {}
26972717

26982718
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
26992719
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2700-
bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2720+
RdxStyle Style, DebugLoc DL = DebugLoc::getUnknown())
27012721
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2702-
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2703-
IsOrdered, DL) {}
2722+
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2723+
DL) {}
27042724

27052725
~VPReductionRecipe() override = default;
27062726

27072727
VPReductionRecipe *clone() override {
27082728
return new VPReductionRecipe(RdxKind, getFastMathFlags(),
27092729
getUnderlyingInstr(), getChainOp(), getVecOp(),
2710-
getCondOp(), IsOrdered, getDebugLoc());
2730+
getCondOp(), Style, getDebugLoc());
27112731
}
27122732

27132733
static inline bool classof(const VPRecipeBase *R) {
27142734
return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2715-
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
2716-
R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
2735+
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
27172736
}
27182737

27192738
static inline bool classof(const VPUser *U) {
@@ -2746,9 +2765,13 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
27462765
/// Return the recurrence kind for the in-loop reduction.
27472766
RecurKind getRecurrenceKind() const { return RdxKind; }
27482767
/// Return true if the in-loop reduction is ordered.
2749-
bool isOrdered() const { return IsOrdered; };
2768+
bool isOrdered() const {
2769+
return std::holds_alternative<RdxOrderedInLoop>(Style);
2770+
};
27502771
/// Return true if the in-loop reduction is conditional.
27512772
bool isConditional() const { return IsConditional; };
2773+
/// Returns true if the reduction outputs a vector with a scaled down VF.
2774+
bool isPartialReduction() const { return getVFScaleFactor() > 1; }
27522775
/// The VPValue of the scalar Chain being accumulated.
27532776
VPValue *getChainOp() const { return getOperand(0); }
27542777
/// The VPValue of the vector value to be reduced.
@@ -2757,68 +2780,12 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
27572780
VPValue *getCondOp() const {
27582781
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
27592782
}
2760-
};
2761-
2762-
/// A recipe for forming partial reductions. In the loop, an accumulator and
2763-
/// vector operand are added together and passed to the next iteration as the
2764-
/// next accumulator. After the loop body, the accumulator is reduced to a
2765-
/// scalar value.
2766-
class VPPartialReductionRecipe : public VPReductionRecipe {
2767-
unsigned Opcode;
2768-
2769-
/// The divisor by which the VF of this recipe's output should be divided
2770-
/// during execution.
2771-
unsigned VFScaleFactor;
2772-
2773-
public:
2774-
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
2775-
VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2776-
: VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
2777-
VFScaleFactor, ReductionInst) {}
2778-
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2779-
VPValue *Cond, unsigned ScaleFactor,
2780-
Instruction *ReductionInst = nullptr)
2781-
: VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2782-
FastMathFlags(), ReductionInst,
2783-
ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2784-
Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2785-
[[maybe_unused]] auto *AccumulatorRecipe =
2786-
getChainOp()->getDefiningRecipe();
2787-
// When cloning as part of a VPExpressionRecipe the chain op could have
2788-
// replaced by a temporary VPValue, so it doesn't have a defining recipe.
2789-
assert((!AccumulatorRecipe ||
2790-
isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2791-
isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2792-
"Unexpected operand order for partial reduction recipe");
2793-
}
2794-
~VPPartialReductionRecipe() override = default;
2795-
2796-
VPPartialReductionRecipe *clone() override {
2797-
return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2798-
getCondOp(), VFScaleFactor,
2799-
getUnderlyingInstr());
2800-
}
2801-
2802-
VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2803-
2804-
/// Generate the reduction in the loop.
2805-
void execute(VPTransformState &State) override;
2806-
2807-
/// Return the cost of this VPPartialReductionRecipe.
2808-
InstructionCost computeCost(ElementCount VF,
2809-
VPCostContext &Ctx) const override;
2810-
2811-
/// Get the binary op's opcode.
2812-
unsigned getOpcode() const { return Opcode; }
2813-
2814-
/// Get the factor that the VF of this recipe's output should be scaled by.
2815-
unsigned getVFScaleFactor() const { return VFScaleFactor; }
2816-
2817-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2818-
/// Print the recipe.
2819-
void print(raw_ostream &O, const Twine &Indent,
2820-
VPSlotTracker &SlotTracker) const override;
2821-
#endif
2783+
/// Get the factor that the VF of this recipe's output should be scaled by, or
2784+
/// null if it isn't scaled.
2785+
std::optional<unsigned> getVFScaleFactor() const {
2786+
auto *Partial = std::get_if<RdxPartial>(&Style);
2787+
return Partial ? std::make_optional(Partial->VFScaleFactor) : std::nullopt;
2788+
}
28222789
};
28232790

28242791
/// A recipe to represent inloop reduction operations with vector-predication
@@ -2834,7 +2801,9 @@ class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
28342801
R.getFastMathFlags(),
28352802
cast_or_null<Instruction>(R.getUnderlyingValue()),
28362803
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2837-
R.isOrdered(), DL) {}
2804+
R.isOrdered() ? RdxStyle(RdxOrderedInLoop{})
2805+
: RdxStyle(RdxInLoop{}),
2806+
DL) {}
28382807

28392808
~VPReductionEVLRecipe() override = default;
28402809

@@ -3098,8 +3067,8 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
30983067
void decompose();
30993068

31003069
unsigned getVFScaleFactor() const {
3101-
auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
3102-
return PR ? PR->getVFScaleFactor() : 1;
3070+
auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3071+
return PR ? PR->getVFScaleFactor().value_or(1) : 1;
31033072
}
31043073

31053074
/// Method for generating code, must not be called as this recipe is abstract.

0 commit comments

Comments
 (0)