@@ -668,6 +668,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
668
668
void VPlanTransforms::optimizeForVFAndUF (VPlan &Plan, ElementCount BestVF,
669
669
unsigned BestUF,
670
670
PredicatedScalarEvolution &PSE) {
671
+
671
672
assert (Plan.hasVF (BestVF) && " BestVF is not available in Plan" );
672
673
assert (Plan.hasUF (BestUF) && " BestUF is not available in Plan" );
673
674
VPBasicBlock *ExitingVPBB =
@@ -710,6 +711,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
710
711
// TODO: Further simplifications are possible
711
712
// 1. Replace inductions with constants.
712
713
// 2. Replace vector loop region with VPBasicBlock.
714
+ //
713
715
}
714
716
715
717
// / Sink users of \p FOR after the recipe defining the previous value \p
@@ -1657,3 +1659,129 @@ void VPlanTransforms::createInterleaveGroups(
1657
1659
}
1658
1660
}
1659
1661
}
1662
+
1663
+ static bool supportedLoad (VPWidenRecipe *R0, VPValue *V, unsigned Idx) {
1664
+ if (auto *W = dyn_cast_or_null<VPWidenLoadRecipe>(V->getDefiningRecipe ())) {
1665
+ if (W->getMask ())
1666
+ return false ;
1667
+ return !W->getMask () && (R0->getOperand (0 ) == V || R0->getOperand (1 ) == V);
1668
+ }
1669
+
1670
+ if (auto *IR = dyn_cast_or_null<VPInterleaveRecipe>(V->getDefiningRecipe ())) {
1671
+ return IR->getInterleaveGroup ()->getFactor () ==
1672
+ IR->getInterleaveGroup ()->getNumMembers () &&
1673
+ IR->getVPValue (Idx) == V;
1674
+ }
1675
+ return false ;
1676
+ }
1677
+
1678
+ // / Returns true of \p IR is a consecutive interleave group with \p VF members.
1679
+ static bool isConsecutiveInterleaveGroup (VPInterleaveRecipe *IR,
1680
+ ElementCount VF) {
1681
+ if (!IR)
1682
+ return false ;
1683
+ auto IG = IR->getInterleaveGroup ();
1684
+ return IG->getFactor () == IG->getNumMembers () &&
1685
+ IG->getNumMembers () == VF.getKnownMinValue ();
1686
+ }
1687
+
1688
+ bool VPlanTransforms::narrowInterleaveGroups (VPlan &Plan, ElementCount VF) {
1689
+ using namespace llvm ::VPlanPatternMatch;
1690
+ if (VF.isScalable ())
1691
+ return false ;
1692
+
1693
+ bool Changed = false ;
1694
+ SmallVector<VPInterleaveRecipe *> StoreGroups;
1695
+ for (auto &R : make_early_inc_range (
1696
+ *Plan.getVectorLoopRegion ()->getEntryBasicBlock ())) {
1697
+ if (match (&R, m_BranchOnCount (m_VPValue (), m_VPValue ())) ||
1698
+ isa<VPCanonicalIVPHIRecipe>(&R))
1699
+ continue ;
1700
+
1701
+ // Bail out on recipes not supported at the moment:
1702
+ // * phi recipes other than the canonical induction
1703
+ // * recipes writing to memory except interleave groups
1704
+ // Only support plans with a canonical induction phi.
1705
+ if ((R.isPhi () && !isa<VPCanonicalIVPHIRecipe>(&R)) ||
1706
+ (R.mayWriteToMemory () && !isa<VPInterleaveRecipe>(&R)))
1707
+ return false ;
1708
+
1709
+ auto *IR = dyn_cast<VPInterleaveRecipe>(&R);
1710
+ if (!IR)
1711
+ continue ;
1712
+
1713
+ if (!isConsecutiveInterleaveGroup (IR, VF))
1714
+ return false ;
1715
+ if (IR->getStoredValues ().empty ())
1716
+ continue ;
1717
+
1718
+ auto *Lane0 = dyn_cast_or_null<VPWidenRecipe>(
1719
+ IR->getStoredValues ()[0 ]->getDefiningRecipe ());
1720
+ if (!Lane0)
1721
+ return false ;
1722
+ for (const auto &[I, V] : enumerate(IR->getStoredValues ())) {
1723
+ auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe ());
1724
+ if (!R || R->getOpcode () != Lane0->getOpcode ())
1725
+ return false ;
1726
+ // Work around captured structured bindings being a C++20 extension.
1727
+ auto Idx = I;
1728
+ if (any_of (R->operands (), [Lane0, Idx](VPValue *V) {
1729
+ return !supportedLoad (Lane0, V, Idx);
1730
+ }))
1731
+ return false ;
1732
+ }
1733
+
1734
+ StoreGroups.push_back (IR);
1735
+ }
1736
+
1737
+ // Narrow operation tree rooted at store groups.
1738
+ for (auto *StoreGroup : StoreGroups) {
1739
+ auto *Lane0 = cast<VPWidenRecipe>(
1740
+ StoreGroup->getStoredValues ()[0 ]->getDefiningRecipe ());
1741
+
1742
+ unsigned LoadGroupIdx =
1743
+ isa<VPInterleaveRecipe>(Lane0->getOperand (1 )->getDefiningRecipe ()) ? 1
1744
+ : 0 ;
1745
+ unsigned WideLoadIdx = 1 - LoadGroupIdx;
1746
+ auto *LoadGroup = cast<VPInterleaveRecipe>(
1747
+ Lane0->getOperand (LoadGroupIdx)->getDefiningRecipe ());
1748
+
1749
+ auto *WideLoad = cast<VPWidenLoadRecipe>(
1750
+ Lane0->getOperand (WideLoadIdx)->getDefiningRecipe ());
1751
+
1752
+ // Narrow wide load to uniform scalar load, as transformed VPlan will only
1753
+ // process one original iteration.
1754
+ auto *N = new VPReplicateRecipe (&WideLoad->getIngredient (),
1755
+ WideLoad->operands (), true );
1756
+ // Narrow interleave group to wide load, as transformed VPlan will only
1757
+ // process one original iteration.
1758
+ auto *L = new VPWidenLoadRecipe (
1759
+ *cast<LoadInst>(LoadGroup->getInterleaveGroup ()->getInsertPos ()),
1760
+ LoadGroup->getAddr (), LoadGroup->getMask (), true , false ,
1761
+ LoadGroup->getDebugLoc ());
1762
+ L->insertBefore (LoadGroup);
1763
+ N->insertBefore (LoadGroup);
1764
+ Lane0->setOperand (LoadGroupIdx, L);
1765
+ Lane0->setOperand (WideLoadIdx, N);
1766
+
1767
+ auto *S = new VPWidenStoreRecipe (
1768
+ *cast<StoreInst>(StoreGroup->getInterleaveGroup ()->getInsertPos ()),
1769
+ StoreGroup->getAddr (), Lane0, nullptr , true , false ,
1770
+ StoreGroup->getDebugLoc ());
1771
+ S->insertBefore (StoreGroup);
1772
+ StoreGroup->eraseFromParent ();
1773
+ Changed = true ;
1774
+ }
1775
+
1776
+ if (!Changed)
1777
+ return false ;
1778
+
1779
+ // Adjust induction to reflect that the transformed plan only processes one
1780
+ // original iteration.
1781
+ auto *CanIV = Plan.getCanonicalIV ();
1782
+ VPInstruction *Inc = cast<VPInstruction>(CanIV->getBackedgeValue ());
1783
+ Inc->setOperand (
1784
+ 1 , Plan.getOrAddLiveIn (ConstantInt::get (CanIV->getScalarType (), 1 )));
1785
+ removeDeadRecipes (Plan);
1786
+ return true ;
1787
+ }
0 commit comments