Skip to content

[RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc #88295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);

FunctionPass *createRISCVCoalesceVSETVLIPass();
void initializeRISCVCoalesceVSETVLIPass(PassRegistry &);

FunctionPass *createRISCVPostRAExpandPseudoPass();
void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertReadWriteCSRPass();
Expand Down
126 changes: 109 additions & 17 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,19 @@
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
using namespace llvm;

#define DEBUG_TYPE "riscv-insert-vsetvli"
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"

STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");

static cl::opt<bool> DisableInsertVSETVLPHIOpt(
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
Expand Down Expand Up @@ -190,6 +193,11 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
if (UseMO.getReg() == RISCV::NoRegister)
return true;

if (UseMO.isUndef())
return true;
if (UseMO.getReg().isPhysical())
return false;

if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
if (UseMI->isImplicitDef())
return true;
Expand Down Expand Up @@ -778,18 +786,52 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
VSETVLIInfo &Info) const;
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
void doLocalPostpass(MachineBasicBlock &MBB);
void doPRE(MachineBasicBlock &MBB);
void insertReadVL(MachineBasicBlock &MBB);
};

class RISCVCoalesceVSETVLI : public MachineFunctionPass {
public:
static char ID;
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;

RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();

AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<LiveStacks>();

MachineFunctionPass::getAnalysisUsage(AU);
}

StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }

private:
bool coalesceVSETVLIs(MachineBasicBlock &MBB);
};

} // end anonymous namespace

char RISCVInsertVSETVLI::ID = 0;

INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)

char RISCVCoalesceVSETVLI::ID = 0;

INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
RISCV_COALESCE_VSETVLI_NAME, false, false)

// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
Expand Down Expand Up @@ -1511,12 +1553,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,

auto &AVL = MI.getOperand(1);
auto &PrevAVL = PrevMI.getOperand(1);
assert(MRI.isSSA());

// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
// For now just check that PrevMI uses the same virtual register.
if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
(!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()))
(!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
PrevAVL.getReg() != AVL.getReg()))
return false;
}

Expand All @@ -1526,7 +1568,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
return areCompatibleVTYPEs(PriorVType, VType, Used);
}

void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
Expand Down Expand Up @@ -1558,20 +1600,49 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {

if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
Register DefReg = NextMI->getOperand(0).getReg();

MI.getOperand(0).setReg(DefReg);
MI.getOperand(0).setIsDead(false);

// The def of DefReg moved to MI, so extend the LiveInterval up to
// it.
if (DefReg.isVirtual()) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
DefLI.addSegment(S);
DefVNI->def = MISlot;
// Mark DefLI as spillable if it was previously unspillable
DefLI.setWeight(0);

// DefReg may have had no uses, in which case we need to shrink
// the LiveInterval up to MI.
LIS->shrinkToUses(&DefLI);
}

Register OldVLReg;
if (MI.getOperand(1).isReg())
OldVLReg = MI.getOperand(1).getReg();
if (NextMI->getOperand(1).isImm())
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);

// Clear NextMI's AVL early so we're not counting it as a use.
if (NextMI->getOperand(1).isReg())
NextMI->getOperand(1).setReg(RISCV::NoRegister);

if (OldVLReg) {
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
MRI->use_nodbg_empty(OldVLReg))
VLOpDef->eraseFromParent();

// NextMI no longer uses OldVLReg so shrink its LiveInterval.
if (OldVLReg.isVirtual())
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
}
MI.setDesc(NextMI->getDesc());
}
Expand All @@ -1584,9 +1655,13 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
Used = getDemanded(MI, MRI, ST);
}

NumRemovedVSETVL += ToDelete.size();
for (auto *MI : ToDelete)
NumCoalescedVSETVL += ToDelete.size();
for (auto *MI : ToDelete) {
LIS->RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
}

return !ToDelete.empty();
}

void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
Expand Down Expand Up @@ -1661,15 +1736,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);

// Now that all vsetvlis are explicit, go through and do block local
// DSE and peephole based demanded fields based transforms. Note that
// this *must* be done outside the main dataflow so long as we allow
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
for (MachineBasicBlock &MBB : MF)
doLocalPostpass(MBB);

// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
for (MachineBasicBlock &MBB : MF)
Expand All @@ -1683,3 +1749,29 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
return new RISCVInsertVSETVLI();
}

// Now that all vsetvlis are explicit, go through and do block local
// DSE and peephole based demanded fields based transforms. Note that
// this *must* be done outside the main dataflow so long as we allow
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
ST = &MF.getSubtarget<RISCVSubtarget>();
if (!ST->hasVInstructions())
return false;
TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();

bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= coalesceVSETVLIs(MBB);

return Changed;
}

FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
return new RISCVCoalesceVSETVLI();
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVFoldMasksPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
initializeRISCVCoalesceVSETVLIPass(*PR);
initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVInsertWriteVXRMPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
Expand Down Expand Up @@ -388,12 +389,14 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {

bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteFast();
}

bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteOptimized();
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
; CHECK-NEXT: Fixup Statepoint Caller Saved
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Greedy Register Allocator
; CHECK-NEXT: Virtual Register Rewriter
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Virtual Register Map
; CHECK-NEXT: Live Register Matrix
; CHECK-NEXT: Greedy Register Allocator
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x float> poison, float %e0, i64 0
Expand Down Expand Up @@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
Expand All @@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa5, fa5, fa4
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa5, fa5, fa4
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
Expand Down Expand Up @@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
Expand All @@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
Expand Down Expand Up @@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; V128-NEXT: vid.v v8
; V128-NEXT: vsrl.vi v8, v8, 1
; V128-NEXT: vmv.v.i v0, 10
; V128-NEXT: vadd.vi v8, v8, 1
; V128-NEXT: vmv.v.i v0, 10
; V128-NEXT: vrgather.vv v10, v9, v8, v0.t
; V128-NEXT: vmv.v.v v8, v10
; V128-NEXT: ret
Expand All @@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu
; V512-NEXT: vid.v v8
; V512-NEXT: vsrl.vi v8, v8, 1
; V512-NEXT: vmv.v.i v0, 10
; V512-NEXT: vadd.vi v8, v8, 1
; V512-NEXT: vmv.v.i v0, 10
; V512-NEXT: vrgather.vv v10, v9, v8, v0.t
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
Expand Down
Loading
Loading