@@ -552,7 +552,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
552552 case VPRecipeBase::VPWidenIntOrFpInductionSC:
553553 case VPRecipeBase::VPWidenPointerInductionSC:
554554 case VPRecipeBase::VPReductionPHISC:
555- case VPRecipeBase::VPPartialReductionSC:
556555 return true ;
557556 case VPRecipeBase::VPBranchOnMaskSC:
558557 case VPRecipeBase::VPInterleaveSC:
@@ -2194,34 +2193,37 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
21942193 // / Descriptor for the reduction.
21952194 const RecurrenceDescriptor &RdxDesc;
21962195
2197- // / The phi is part of an in-loop reduction.
2198- bool IsInLoop;
2199-
22002196 // / The phi is part of an ordered reduction. Requires IsInLoop to be true.
22012197 bool IsOrdered;
22022198
2203- // / When expanding the reduction PHI, the plan's VF element count is divided
2204- // / by this factor to form the reduction phi's VF.
2205- unsigned VFScaleFactor = 1 ;
2199+ // / The scaling factor, relative to the VF, that this recipe's output is
2200+ // / divided by.
2201+ // / For outer-loop reductions this is equal to 1.
2202+ // / For in-loop reductions this is equal to 0, to specify that this is equal
2203+ // / to the VF (which may not be known yet). For partial-reductions this is
2204+ // / equal to another scalar value.
2205+ unsigned VFScaleFactor;
22062206
22072207public:
22082208 // / Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
22092209 // / RdxDesc.
22102210 VPReductionPHIRecipe (PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
2211- VPValue &Start, bool IsInLoop = false ,
2212- bool IsOrdered = false , unsigned VFScaleFactor = 1 )
2211+ VPValue &Start, bool IsOrdered = false ,
2212+ unsigned VFScaleFactor = 1 )
22132213 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2214- RdxDesc (RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
2215- VFScaleFactor(VFScaleFactor) {
2216- assert ((!IsOrdered || IsInLoop) && " IsOrdered requires IsInLoop" );
2214+ RdxDesc (RdxDesc), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2215+ assert ((!IsOrdered || isInLoop ()) &&
2216+ " IsOrdered requires the reduction to be in-loop" );
2217+ assert (((!isInLoop () && !IsOrdered) || isInLoop ()) &&
2218+ " Invalid VFScaleFactor" );
22172219 }
22182220
22192221 ~VPReductionPHIRecipe () override = default ;
22202222
22212223 VPReductionPHIRecipe *clone () override {
22222224 auto *R = new VPReductionPHIRecipe (
22232225 dyn_cast_or_null<PHINode>(getUnderlyingValue ()), RdxDesc,
2224- *getOperand (0 ), IsInLoop, IsOrdered, VFScaleFactor);
2226+ *getOperand (0 ), IsOrdered, VFScaleFactor);
22252227 R->addOperand (getBackedgeValue ());
22262228 return R;
22272229 }
@@ -2247,8 +2249,11 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
22472249 // / Returns true, if the phi is part of an ordered reduction.
22482250 bool isOrdered () const { return IsOrdered; }
22492251
2250- // / Returns true, if the phi is part of an in-loop reduction.
2251- bool isInLoop () const { return IsInLoop; }
2252+ // / Returns true if the phi is part of an in-loop reduction.
2253+ bool isInLoop () const { return VFScaleFactor == 0 ; }
2254+
2255+ // / Returns true if the reduction outputs a vector with a scaled down VF.
2256+ bool isPartialReduction () const { return VFScaleFactor > 1 ; }
22522257
22532258 // / Returns true if the recipe only uses the first lane of operand \p Op.
22542259 bool onlyFirstLaneUsed (const VPValue *Op) const override {
@@ -2421,23 +2426,32 @@ class VPInterleaveRecipe : public VPRecipeBase {
24212426 Instruction *getInsertPos () const { return IG->getInsertPos (); }
24222427};
24232428
2424- // / A recipe to represent inloop reduction operations, performing a reduction on
2425- // / a vector operand into a scalar value, and adding the result to a chain.
2426- // / The Operands are {ChainOp, VecOp, [Condition]}.
2429+ // / A recipe to represent inloop, ordered or partial reduction operations. It
2430+ // / performs a reduction on a vector operand into a scalar (vector in the case
2431+ // / of a partial reduction) value, and adds the result to a chain. The Operands
2432+ // / are {ChainOp, VecOp, [Condition]}.
24272433class VPReductionRecipe : public VPRecipeWithIRFlags {
24282434 // / The recurrence kind for the reduction in question.
24292435 RecurKind RdxKind;
24302436 bool IsOrdered;
24312437 // / Whether the reduction is conditional.
24322438 bool IsConditional = false ;
2439+ // / The scaling factor, relative to the VF, that this recipe's output is
2440+ // / divided by.
2441+ // / For outer-loop reductions this is equal to 1.
2442+ // / For in-loop reductions this is equal to 0, to specify that this is equal
2443+ // / to the VF (which may not be known yet).
2444+ // / For partial-reductions this is equal to another scalar value.
2445+ unsigned VFScaleFactor;
24332446
24342447protected:
24352448 VPReductionRecipe (const unsigned char SC, RecurKind RdxKind,
24362449 FastMathFlags FMFs, Instruction *I,
24372450 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2438- bool IsOrdered, DebugLoc DL)
2451+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL)
24392452 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2440- IsOrdered (IsOrdered) {
2453+ IsOrdered (IsOrdered), VFScaleFactor(VFScaleFactor) {
2454+ assert ((!IsOrdered || VFScaleFactor == 0 ) && " Invalid scale factor" );
24412455 if (CondOp) {
24422456 IsConditional = true ;
24432457 addOperand (CondOp);
@@ -2448,30 +2462,29 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24482462public:
24492463 VPReductionRecipe (RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
24502464 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2451- bool IsOrdered, DebugLoc DL = {})
2465+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
24522466 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
24532467 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2454- IsOrdered, DL) {}
2468+ IsOrdered, VFScaleFactor, DL) {}
24552469
24562470 VPReductionRecipe (const RecurKind RdxKind, FastMathFlags FMFs,
24572471 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2458- bool IsOrdered, DebugLoc DL = {})
2472+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
24592473 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr ,
24602474 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2461- IsOrdered, DL) {}
2475+ IsOrdered, VFScaleFactor, DL) {}
24622476
24632477 ~VPReductionRecipe () override = default ;
24642478
24652479 VPReductionRecipe *clone () override {
2466- return new VPReductionRecipe (RdxKind, getFastMathFlags (),
2467- getUnderlyingInstr (), getChainOp (), getVecOp (),
2468- getCondOp (), IsOrdered, getDebugLoc ());
2480+ return new VPReductionRecipe (
2481+ RdxKind, getFastMathFlags (), getUnderlyingInstr (), getChainOp (),
2482+ getVecOp (), getCondOp (), IsOrdered, VFScaleFactor , getDebugLoc ());
24692483 }
24702484
24712485 static inline bool classof (const VPRecipeBase *R) {
24722486 return R->getVPDefID () == VPRecipeBase::VPReductionSC ||
2473- R->getVPDefID () == VPRecipeBase::VPReductionEVLSC ||
2474- R->getVPDefID () == VPRecipeBase::VPPartialReductionSC;
2487+ R->getVPDefID () == VPRecipeBase::VPReductionEVLSC;
24752488 }
24762489
24772490 static inline bool classof (const VPUser *U) {
@@ -2498,6 +2511,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24982511 bool isOrdered () const { return IsOrdered; };
24992512 // / Return true if the in-loop reduction is conditional.
25002513 bool isConditional () const { return IsConditional; };
2514+ // / Returns true if the reduction outputs a vector with a scaled down VF.
2515+ bool isPartialReduction () const { return VFScaleFactor > 1 ; }
25012516 // / The VPValue of the scalar Chain being accumulated.
25022517 VPValue *getChainOp () const { return getOperand (0 ); }
25032518 // / The VPValue of the vector value to be reduced.
@@ -2506,68 +2521,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
25062521 VPValue *getCondOp () const {
25072522 return isConditional () ? getOperand (getNumOperands () - 1 ) : nullptr ;
25082523 }
2509- };
2510-
2511- // / A recipe for forming partial reductions. In the loop, an accumulator and
2512- // / vector operand are added together and passed to the next iteration as the
2513- // / next accumulator. After the loop body, the accumulator is reduced to a
2514- // / scalar value.
2515- class VPPartialReductionRecipe : public VPReductionRecipe {
2516- unsigned Opcode;
2517-
2518- // / The divisor by which the VF of this recipe's output should be divided
2519- // / during execution.
2520- unsigned VFScaleFactor;
2521-
2522- public:
2523- VPPartialReductionRecipe (Instruction *ReductionInst, VPValue *Op0,
2524- VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2525- : VPPartialReductionRecipe(ReductionInst->getOpcode (), Op0, Op1, Cond,
2526- VFScaleFactor, ReductionInst) {}
2527- VPPartialReductionRecipe (unsigned Opcode, VPValue *Op0, VPValue *Op1,
2528- VPValue *Cond, unsigned ScaleFactor,
2529- Instruction *ReductionInst = nullptr )
2530- : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2531- FastMathFlags (), ReductionInst,
2532- ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2533- Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2534- [[maybe_unused]] auto *AccumulatorRecipe =
2535- getChainOp ()->getDefiningRecipe ();
2536- // When cloning as part of a VPExpressionRecipe, the chain op could have
2537- // been removed from the plan and so doesn't have a defining recipe.
2538- assert ((!AccumulatorRecipe ||
2539- isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2540- isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2541- " Unexpected operand order for partial reduction recipe" );
2542- }
2543- ~VPPartialReductionRecipe () override = default ;
2544-
2545- VPPartialReductionRecipe *clone () override {
2546- return new VPPartialReductionRecipe (Opcode, getOperand (0 ), getOperand (1 ),
2547- getCondOp (), VFScaleFactor,
2548- getUnderlyingInstr ());
2549- }
2550-
2551- VP_CLASSOF_IMPL (VPDef::VPPartialReductionSC)
2552-
2553- // / Generate the reduction in the loop.
2554- void execute(VPTransformState &State) override ;
2555-
2556- // / Return the cost of this VPPartialReductionRecipe.
2557- InstructionCost computeCost (ElementCount VF,
2558- VPCostContext &Ctx) const override ;
2559-
2560- // / Get the binary op's opcode.
2561- unsigned getOpcode () const { return Opcode; }
2562-
25632524 // / Get the factor that the VF of this recipe's output should be scaled by.
25642525 unsigned getVFScaleFactor () const { return VFScaleFactor; }
2565-
2566- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2567- // / Print the recipe.
2568- void print (raw_ostream &O, const Twine &Indent,
2569- VPSlotTracker &SlotTracker) const override ;
2570- #endif
25712526};
25722527
25732528// / A recipe to represent inloop reduction operations with vector-predication
@@ -2583,7 +2538,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
25832538 R.getFastMathFlags(),
25842539 cast_or_null<Instruction>(R.getUnderlyingValue()),
25852540 ArrayRef<VPValue *>({R.getChainOp (), R.getVecOp (), &EVL}), CondOp,
2586- R.isOrdered(), DL) {}
2541+ R.isOrdered(), 0 , DL) {}
25872542
25882543 ~VPReductionEVLRecipe () override = default ;
25892544
0 commit comments