27
27
#include " RISCV.h"
28
28
#include " RISCVSubtarget.h"
29
29
#include " llvm/ADT/Statistic.h"
30
+ #include " llvm/CodeGen/LiveDebugVariables.h"
30
31
#include " llvm/CodeGen/LiveIntervals.h"
32
+ #include " llvm/CodeGen/LiveStacks.h"
31
33
#include " llvm/CodeGen/MachineFunctionPass.h"
32
34
#include < queue>
33
35
using namespace llvm ;
34
36
35
37
#define DEBUG_TYPE " riscv-insert-vsetvli"
36
38
#define RISCV_INSERT_VSETVLI_NAME " RISC-V Insert VSETVLI pass"
39
+ #define RISCV_COALESCE_VSETVLI_NAME " RISC-V Coalesce VSETVLI pass"
37
40
38
41
STATISTIC (NumInsertedVSETVL, " Number of VSETVL inst inserted" );
39
- STATISTIC (NumRemovedVSETVL , " Number of VSETVL inst removed " );
42
+ STATISTIC (NumCoalescedVSETVL , " Number of VSETVL inst coalesced " );
40
43
41
44
static cl::opt<bool > DisableInsertVSETVLPHIOpt (
42
45
" riscv-disable-insert-vsetvl-phi-opt" , cl::init(false ), cl::Hidden,
@@ -190,6 +193,11 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
190
193
if (UseMO.getReg () == RISCV::NoRegister)
191
194
return true ;
192
195
196
+ if (UseMO.isUndef ())
197
+ return true ;
198
+ if (UseMO.getReg ().isPhysical ())
199
+ return false ;
200
+
193
201
if (MachineInstr *UseMI = MRI.getVRegDef (UseMO.getReg ())) {
194
202
if (UseMI->isImplicitDef ())
195
203
return true ;
@@ -778,18 +786,52 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
778
786
VSETVLIInfo &Info) const ;
779
787
void computeIncomingVLVTYPE (const MachineBasicBlock &MBB);
780
788
void emitVSETVLIs (MachineBasicBlock &MBB);
781
- void doLocalPostpass (MachineBasicBlock &MBB);
782
789
void doPRE (MachineBasicBlock &MBB);
783
790
void insertReadVL (MachineBasicBlock &MBB);
784
791
};
785
792
793
+ class RISCVCoalesceVSETVLI : public MachineFunctionPass {
794
+ public:
795
+ static char ID;
796
+ const RISCVSubtarget *ST;
797
+ const TargetInstrInfo *TII;
798
+ MachineRegisterInfo *MRI;
799
+ LiveIntervals *LIS;
800
+
801
+ RISCVCoalesceVSETVLI () : MachineFunctionPass(ID) {}
802
+ bool runOnMachineFunction (MachineFunction &MF) override ;
803
+
804
+ void getAnalysisUsage (AnalysisUsage &AU) const override {
805
+ AU.setPreservesCFG ();
806
+
807
+ AU.addRequired <LiveIntervals>();
808
+ AU.addPreserved <LiveIntervals>();
809
+ AU.addRequired <SlotIndexes>();
810
+ AU.addPreserved <SlotIndexes>();
811
+ AU.addPreserved <LiveDebugVariables>();
812
+ AU.addPreserved <LiveStacks>();
813
+
814
+ MachineFunctionPass::getAnalysisUsage (AU);
815
+ }
816
+
817
+ StringRef getPassName () const override { return RISCV_COALESCE_VSETVLI_NAME; }
818
+
819
+ private:
820
+ bool coalesceVSETVLIs (MachineBasicBlock &MBB);
821
+ };
822
+
786
823
} // end anonymous namespace
787
824
788
825
char RISCVInsertVSETVLI::ID = 0 ;
789
826
790
827
INITIALIZE_PASS (RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
791
828
false , false )
792
829
830
+ char RISCVCoalesceVSETVLI::ID = 0;
831
+
832
+ INITIALIZE_PASS (RISCVCoalesceVSETVLI, " riscv-coalesce-vsetvli" ,
833
+ RISCV_COALESCE_VSETVLI_NAME, false , false )
834
+
793
835
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
794
836
// VSETIVLI instruction.
795
837
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
@@ -1511,12 +1553,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1511
1553
1512
1554
auto &AVL = MI.getOperand (1 );
1513
1555
auto &PrevAVL = PrevMI.getOperand (1 );
1514
- assert (MRI.isSSA ());
1515
1556
1516
1557
// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1517
1558
// For now just check that PrevMI uses the same virtual register.
1518
1559
if (AVL.isReg () && AVL.getReg () != RISCV::X0 &&
1519
- (!PrevAVL.isReg () || PrevAVL.getReg () != AVL.getReg ()))
1560
+ (!MRI.hasOneDef (AVL.getReg ()) || !PrevAVL.isReg () ||
1561
+ PrevAVL.getReg () != AVL.getReg ()))
1520
1562
return false ;
1521
1563
}
1522
1564
@@ -1526,7 +1568,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1526
1568
return areCompatibleVTYPEs (PriorVType, VType, Used);
1527
1569
}
1528
1570
1529
- void RISCVInsertVSETVLI::doLocalPostpass (MachineBasicBlock &MBB) {
1571
+ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs (MachineBasicBlock &MBB) {
1530
1572
MachineInstr *NextMI = nullptr ;
1531
1573
// We can have arbitrary code in successors, so VL and VTYPE
1532
1574
// must be considered demanded.
@@ -1558,20 +1600,49 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1558
1600
1559
1601
if (canMutatePriorConfig (MI, *NextMI, Used, *MRI)) {
1560
1602
if (!isVLPreservingConfig (*NextMI)) {
1561
- MI.getOperand (0 ).setReg (NextMI->getOperand (0 ).getReg ());
1603
+ Register DefReg = NextMI->getOperand (0 ).getReg ();
1604
+
1605
+ MI.getOperand (0 ).setReg (DefReg);
1562
1606
MI.getOperand (0 ).setIsDead (false );
1607
+
1608
+ // The def of DefReg moved to MI, so extend the LiveInterval up to
1609
+ // it.
1610
+ if (DefReg.isVirtual ()) {
1611
+ LiveInterval &DefLI = LIS->getInterval (DefReg);
1612
+ SlotIndex MISlot = LIS->getInstructionIndex (MI).getRegSlot ();
1613
+ VNInfo *DefVNI = DefLI.getVNInfoAt (DefLI.beginIndex ());
1614
+ LiveInterval::Segment S (MISlot, DefLI.beginIndex (), DefVNI);
1615
+ DefLI.addSegment (S);
1616
+ DefVNI->def = MISlot;
1617
+ // Mark DefLI as spillable if it was previously unspillable
1618
+ DefLI.setWeight (0 );
1619
+
1620
+ // DefReg may have had no uses, in which case we need to shrink
1621
+ // the LiveInterval up to MI.
1622
+ LIS->shrinkToUses (&DefLI);
1623
+ }
1624
+
1563
1625
Register OldVLReg;
1564
1626
if (MI.getOperand (1 ).isReg ())
1565
1627
OldVLReg = MI.getOperand (1 ).getReg ();
1566
1628
if (NextMI->getOperand (1 ).isImm ())
1567
1629
MI.getOperand (1 ).ChangeToImmediate (NextMI->getOperand (1 ).getImm ());
1568
1630
else
1569
1631
MI.getOperand (1 ).ChangeToRegister (NextMI->getOperand (1 ).getReg (), false );
1632
+
1633
+ // Clear NextMI's AVL early so we're not counting it as a use.
1634
+ if (NextMI->getOperand (1 ).isReg ())
1635
+ NextMI->getOperand (1 ).setReg (RISCV::NoRegister);
1636
+
1570
1637
if (OldVLReg) {
1571
1638
MachineInstr *VLOpDef = MRI->getUniqueVRegDef (OldVLReg);
1572
1639
if (VLOpDef && TII->isAddImmediate (*VLOpDef, OldVLReg) &&
1573
1640
MRI->use_nodbg_empty (OldVLReg))
1574
1641
VLOpDef->eraseFromParent ();
1642
+
1643
+ // NextMI no longer uses OldVLReg so shrink its LiveInterval.
1644
+ if (OldVLReg.isVirtual ())
1645
+ LIS->shrinkToUses (&LIS->getInterval (OldVLReg));
1575
1646
}
1576
1647
MI.setDesc (NextMI->getDesc ());
1577
1648
}
@@ -1584,9 +1655,13 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1584
1655
Used = getDemanded (MI, MRI, ST);
1585
1656
}
1586
1657
1587
- NumRemovedVSETVL += ToDelete.size ();
1588
- for (auto *MI : ToDelete)
1658
+ NumCoalescedVSETVL += ToDelete.size ();
1659
+ for (auto *MI : ToDelete) {
1660
+ LIS->RemoveMachineInstrFromMaps (*MI);
1589
1661
MI->eraseFromParent ();
1662
+ }
1663
+
1664
+ return !ToDelete.empty ();
1590
1665
}
1591
1666
1592
1667
void RISCVInsertVSETVLI::insertReadVL (MachineBasicBlock &MBB) {
@@ -1661,15 +1736,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1661
1736
for (MachineBasicBlock &MBB : MF)
1662
1737
emitVSETVLIs (MBB);
1663
1738
1664
- // Now that all vsetvlis are explicit, go through and do block local
1665
- // DSE and peephole based demanded fields based transforms. Note that
1666
- // this *must* be done outside the main dataflow so long as we allow
1667
- // any cross block analysis within the dataflow. We can't have both
1668
- // demanded fields based mutation and non-local analysis in the
1669
- // dataflow at the same time without introducing inconsistencies.
1670
- for (MachineBasicBlock &MBB : MF)
1671
- doLocalPostpass (MBB);
1672
-
1673
1739
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1674
1740
// of VLEFF/VLSEGFF.
1675
1741
for (MachineBasicBlock &MBB : MF)
@@ -1683,3 +1749,29 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1683
1749
FunctionPass *llvm::createRISCVInsertVSETVLIPass () {
1684
1750
return new RISCVInsertVSETVLI ();
1685
1751
}
1752
+
1753
+ // Now that all vsetvlis are explicit, go through and do block local
1754
+ // DSE and peephole based demanded fields based transforms. Note that
1755
+ // this *must* be done outside the main dataflow so long as we allow
1756
+ // any cross block analysis within the dataflow. We can't have both
1757
+ // demanded fields based mutation and non-local analysis in the
1758
+ // dataflow at the same time without introducing inconsistencies.
1759
+ bool RISCVCoalesceVSETVLI::runOnMachineFunction (MachineFunction &MF) {
1760
+ // Skip if the vector extension is not enabled.
1761
+ ST = &MF.getSubtarget <RISCVSubtarget>();
1762
+ if (!ST->hasVInstructions ())
1763
+ return false ;
1764
+ TII = ST->getInstrInfo ();
1765
+ MRI = &MF.getRegInfo ();
1766
+ LIS = &getAnalysis<LiveIntervals>();
1767
+
1768
+ bool Changed = false ;
1769
+ for (MachineBasicBlock &MBB : MF)
1770
+ Changed |= coalesceVSETVLIs (MBB);
1771
+
1772
+ return Changed;
1773
+ }
1774
+
1775
+ FunctionPass *llvm::createRISCVCoalesceVSETVLIPass () {
1776
+ return new RISCVCoalesceVSETVLI ();
1777
+ }
0 commit comments