@@ -672,6 +672,7 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
672
672
void VPlanTransforms::optimizeForVFAndUF (VPlan &Plan, ElementCount BestVF,
673
673
unsigned BestUF,
674
674
PredicatedScalarEvolution &PSE) {
675
+
675
676
assert (Plan.hasVF (BestVF) && " BestVF is not available in Plan" );
676
677
assert (Plan.hasUF (BestUF) && " BestUF is not available in Plan" );
677
678
VPBasicBlock *ExitingVPBB =
@@ -713,6 +714,7 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
713
714
// TODO: Further simplifications are possible
714
715
// 1. Replace inductions with constants.
715
716
// 2. Replace vector loop region with VPBasicBlock.
717
+ //
716
718
}
717
719
718
720
// / Sink users of \p FOR after the recipe defining the previous value \p
@@ -1589,3 +1591,129 @@ void VPlanTransforms::createInterleaveGroups(
1589
1591
}
1590
1592
}
1591
1593
}
1594
+
1595
+ static bool supportedLoad (VPWidenRecipe *R0, VPValue *V, unsigned Idx) {
1596
+ if (auto *W = dyn_cast_or_null<VPWidenLoadRecipe>(V->getDefiningRecipe ())) {
1597
+ if (W->getMask ())
1598
+ return false ;
1599
+ return !W->getMask () && (R0->getOperand (0 ) == V || R0->getOperand (1 ) == V);
1600
+ }
1601
+
1602
+ if (auto *IR = dyn_cast_or_null<VPInterleaveRecipe>(V->getDefiningRecipe ())) {
1603
+ return IR->getInterleaveGroup ()->getFactor () ==
1604
+ IR->getInterleaveGroup ()->getNumMembers () &&
1605
+ IR->getVPValue (Idx) == V;
1606
+ }
1607
+ return false ;
1608
+ }
1609
+
1610
+ // / Returns true of \p IR is a consecutive interleave group with \p VF members.
1611
+ static bool isConsecutiveInterleaveGroup (VPInterleaveRecipe *IR,
1612
+ ElementCount VF) {
1613
+ if (!IR)
1614
+ return false ;
1615
+ auto IG = IR->getInterleaveGroup ();
1616
+ return IG->getFactor () == IG->getNumMembers () &&
1617
+ IG->getNumMembers () == VF.getKnownMinValue ();
1618
+ }
1619
+
1620
+ bool VPlanTransforms::narrowInterleaveGroups (VPlan &Plan, ElementCount VF) {
1621
+ using namespace llvm ::VPlanPatternMatch;
1622
+ if (VF.isScalable ())
1623
+ return false ;
1624
+
1625
+ bool Changed = false ;
1626
+ SmallVector<VPInterleaveRecipe *> StoreGroups;
1627
+ for (auto &R : make_early_inc_range (
1628
+ *Plan.getVectorLoopRegion ()->getEntryBasicBlock ())) {
1629
+ if (match (&R, m_BranchOnCount (m_VPValue (), m_VPValue ())) ||
1630
+ isa<VPCanonicalIVPHIRecipe>(&R))
1631
+ continue ;
1632
+
1633
+ // Bail out on recipes not supported at the moment:
1634
+ // * phi recipes other than the canonical induction
1635
+ // * recipes writing to memory except interleave groups
1636
+ // Only support plans with a canonical induction phi.
1637
+ if ((R.isPhi () && !isa<VPCanonicalIVPHIRecipe>(&R)) ||
1638
+ (R.mayWriteToMemory () && !isa<VPInterleaveRecipe>(&R)))
1639
+ return false ;
1640
+
1641
+ auto *IR = dyn_cast<VPInterleaveRecipe>(&R);
1642
+ if (!IR)
1643
+ continue ;
1644
+
1645
+ if (!isConsecutiveInterleaveGroup (IR, VF))
1646
+ return false ;
1647
+ if (IR->getStoredValues ().empty ())
1648
+ continue ;
1649
+
1650
+ auto *Lane0 = dyn_cast_or_null<VPWidenRecipe>(
1651
+ IR->getStoredValues ()[0 ]->getDefiningRecipe ());
1652
+ if (!Lane0)
1653
+ return false ;
1654
+ for (const auto &[I, V] : enumerate(IR->getStoredValues ())) {
1655
+ auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe ());
1656
+ if (!R || R->getOpcode () != Lane0->getOpcode ())
1657
+ return false ;
1658
+ // Work around captured structured bindings being a C++20 extension.
1659
+ auto Idx = I;
1660
+ if (any_of (R->operands (), [Lane0, Idx](VPValue *V) {
1661
+ return !supportedLoad (Lane0, V, Idx);
1662
+ }))
1663
+ return false ;
1664
+ }
1665
+
1666
+ StoreGroups.push_back (IR);
1667
+ }
1668
+
1669
+ // Narrow operation tree rooted at store groups.
1670
+ for (auto *StoreGroup : StoreGroups) {
1671
+ auto *Lane0 = cast<VPWidenRecipe>(
1672
+ StoreGroup->getStoredValues ()[0 ]->getDefiningRecipe ());
1673
+
1674
+ unsigned LoadGroupIdx =
1675
+ isa<VPInterleaveRecipe>(Lane0->getOperand (1 )->getDefiningRecipe ()) ? 1
1676
+ : 0 ;
1677
+ unsigned WideLoadIdx = 1 - LoadGroupIdx;
1678
+ auto *LoadGroup = cast<VPInterleaveRecipe>(
1679
+ Lane0->getOperand (LoadGroupIdx)->getDefiningRecipe ());
1680
+
1681
+ auto *WideLoad = cast<VPWidenLoadRecipe>(
1682
+ Lane0->getOperand (WideLoadIdx)->getDefiningRecipe ());
1683
+
1684
+ // Narrow wide load to uniform scalar load, as transformed VPlan will only
1685
+ // process one original iteration.
1686
+ auto *N = new VPReplicateRecipe (&WideLoad->getIngredient (),
1687
+ WideLoad->operands (), true );
1688
+ // Narrow interleave group to wide load, as transformed VPlan will only
1689
+ // process one original iteration.
1690
+ auto *L = new VPWidenLoadRecipe (
1691
+ *cast<LoadInst>(LoadGroup->getInterleaveGroup ()->getInsertPos ()),
1692
+ LoadGroup->getAddr (), LoadGroup->getMask (), true , false ,
1693
+ LoadGroup->getDebugLoc ());
1694
+ L->insertBefore (LoadGroup);
1695
+ N->insertBefore (LoadGroup);
1696
+ Lane0->setOperand (LoadGroupIdx, L);
1697
+ Lane0->setOperand (WideLoadIdx, N);
1698
+
1699
+ auto *S = new VPWidenStoreRecipe (
1700
+ *cast<StoreInst>(StoreGroup->getInterleaveGroup ()->getInsertPos ()),
1701
+ StoreGroup->getAddr (), Lane0, nullptr , true , false ,
1702
+ StoreGroup->getDebugLoc ());
1703
+ S->insertBefore (StoreGroup);
1704
+ StoreGroup->eraseFromParent ();
1705
+ Changed = true ;
1706
+ }
1707
+
1708
+ if (!Changed)
1709
+ return false ;
1710
+
1711
+ // Adjust induction to reflect that the transformed plan only processes one
1712
+ // original iteration.
1713
+ auto *CanIV = Plan.getCanonicalIV ();
1714
+ VPInstruction *Inc = cast<VPInstruction>(CanIV->getBackedgeValue ());
1715
+ Inc->setOperand (
1716
+ 1 , Plan.getOrAddLiveIn (ConstantInt::get (CanIV->getScalarType (), 1 )));
1717
+ removeDeadRecipes (Plan);
1718
+ return true ;
1719
+ }
0 commit comments