-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV][VLOpt] Reorganize visit order and worklist management #123973
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][VLOpt] Reorganize visit order and worklist management #123973
Conversation
This implements a suggestion by Craig in PR llvm#123878. We can move the worklist management out of the per-instruction work and do it once at the end of scanning all the instructions. This should reduce repeat visitation of the same instruction when no changes can be made. Note that this does not remove the inherent O(N^2) in the algorithm. We're still potentially visiiting every user of every def. I also included a guard for unreachable blocks since that had been mentioned as a possible cause. It seems we've rulled that out, but guarding for this case is still a good idea.
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesThis implements a suggestion by Craig in PR #123878. We can move the worklist management out of the per-instruction work and do it once at the end of scanning all the instructions. This should reduce repeat visitation of the same instruction when no changes can be made. Note that this does not remove the inherent O(N^2) in the algorithm. We're still potentially visiiting every user of every def. I also included a guard for unreachable blocks since that had been mentioned as a possible cause. It seems we've rulled that out, but guarding for this case is still a good idea. Full diff: https://github.com/llvm/llvm-project/pull/123973.diff 1 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 54ca8ccd8d9e90..66d26bf5b11e2d 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1292,53 +1292,60 @@ std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
return CommonVL;
}
-bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
- SetVector<MachineInstr *> Worklist;
- Worklist.insert(&OrigMI);
+bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) {
+ LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
- bool MadeChange = false;
- while (!Worklist.empty()) {
- MachineInstr &MI = *Worklist.pop_back_val();
- LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
+ if (!isVectorRegClass(MI.getOperand(0).getReg(), MRI))
+ return false;
- if (!isVectorRegClass(MI.getOperand(0).getReg(), MRI))
- continue;
+ auto CommonVL = checkUsers(MI);
+ if (!CommonVL)
+ return false;
- auto CommonVL = checkUsers(MI);
- if (!CommonVL)
- continue;
+ assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
+ "Expected VL to be an Imm or virtual Reg");
- assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
- "Expected VL to be an Imm or virtual Reg");
+ unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
+ MachineOperand &VLOp = MI.getOperand(VLOpNum);
- unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
- MachineOperand &VLOp = MI.getOperand(VLOpNum);
+ if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
+ LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
+ return false;
+ }
- if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
- LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
- continue;
- }
+ if (CommonVL->isImm()) {
+ LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
+ << CommonVL->getImm() << " for " << MI << "\n");
+ VLOp.ChangeToImmediate(CommonVL->getImm());
+ return true;
+ }
+ const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
+ if (!MDT->dominates(VLMI, &MI))
+ return false;
+ LLVM_DEBUG(
+ dbgs() << " Reduce VL from " << VLOp << " to "
+ << printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())
+ << " for " << MI << "\n");
- if (CommonVL->isImm()) {
- LLVM_DEBUG(dbgs() << " Reduce VL from " << VLOp << " to "
- << CommonVL->getImm() << " for " << MI << "\n");
- VLOp.ChangeToImmediate(CommonVL->getImm());
- } else {
- const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
- if (!MDT->dominates(VLMI, &MI))
- continue;
- LLVM_DEBUG(
- dbgs() << " Reduce VL from " << VLOp << " to "
- << printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())
- << " for " << MI << "\n");
+ // All our checks passed. We can reduce VL.
+ VLOp.ChangeToRegister(CommonVL->getReg(), false);
+ return true;
+}
- // All our checks passed. We can reduce VL.
- VLOp.ChangeToRegister(CommonVL->getReg(), false);
- }
+bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ MRI = &MF.getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
- MadeChange = true;
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ if (!ST.hasVInstructions())
+ return false;
- // Now add all inputs to this instruction to the worklist.
+ SetVector<MachineInstr *> Worklist;
+ auto PushOperands = [this, &Worklist](MachineInstr &MI,
+ bool IgnoreSameBlock) {
for (auto &Op : MI.operands()) {
if (!Op.isReg() || !Op.isUse() || !Op.getReg().isVirtual())
continue;
@@ -1351,34 +1358,40 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &OrigMI) {
if (!isCandidate(*DefMI))
continue;
+ if (IgnoreSameBlock && DefMI->getParent() == MI.getParent())
+ continue;
+
Worklist.insert(DefMI);
}
- }
-
- return MadeChange;
-}
-
-bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
-
- MRI = &MF.getRegInfo();
- MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
-
- const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
- if (!ST.hasVInstructions())
- return false;
+ };
+ // Do a first pass eagerly rewriting in roughly reverse instruction
+ // order, populate the worklist with any instructions we might need to
+ // revisit. We avoid adding definitions to the worklist if they're
+ // in the same block - we're about to visit them anyways.
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- // Visit instructions in reverse order.
+ // Avoid unreachable blocks as they have degenerate dominance
+ if (!MDT->isReachableFromEntry(&MBB))
+ continue;
+
for (auto &MI : make_range(MBB.rbegin(), MBB.rend())) {
if (!isCandidate(MI))
continue;
-
- MadeChange |= tryReduceVL(MI);
+ if (!tryReduceVL(MI))
+ continue;
+ MadeChange = true;
+ PushOperands(MI, /*IgnoreSameBlock*/ true);
}
}
+ while (!Worklist.empty()) {
+ assert(MadeChange);
+ MachineInstr &MI = *Worklist.pop_back_val();
+ if (!tryReduceVL(MI))
+ continue;
+ PushOperands(MI, /*IgnoreSameBlock*/ false);
+ }
+
return MadeChange;
}
|
while (!Worklist.empty()) { | ||
MachineInstr &MI = *Worklist.pop_back_val(); | ||
LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n"); | ||
if (!isVectorRegClass(MI.getOperand(0).getReg(), MRI)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need this since we skip adding it to the worklist in PushOperands?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a bit more cleanup on some the worklist invariants here. I'm planning a separate patch. (edit: typo)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
In retrospect, this probably should have been rolled into llvm#123973. It seemed more involved when I first decided to split. :)
In retrospect, this probably should have been rolled into #123973. It seemed more involved when I first decided to split. :)
This implements a suggestion by Craig in PR #123878. We can move the worklist management out of the per-instruction work and do it once at the end of scanning all the instructions. This should reduce repeat visitation of the same instruction when no changes can be made.
Note that this does not remove the inherent O(N^2) in the algorithm. We're still potentially visiiting every user of every def.
I also included a guard for unreachable blocks since that had been mentioned as a possible cause. It seems we've rulled that out, but guarding for this case is still a good idea.