Skip to content

[RISCV][VLOPT] Compute demanded VLs up front #124530

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 29, 2025
61 changes: 25 additions & 36 deletions llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
Expand Down Expand Up @@ -56,6 +56,10 @@ class RISCVVLOptimizer : public MachineFunctionPass {
std::optional<MachineOperand> checkUsers(MachineInstr &MI);
bool tryReduceVL(MachineInstr &MI);
bool isCandidate(const MachineInstr &MI) const;

/// For a given instruction, records what elements of it are demanded by
/// downstream users.
DenseMap<const MachineInstr *, std::optional<MachineOperand>> DemandedVLs;
};

} // end anonymous namespace
Expand Down Expand Up @@ -1201,14 +1205,19 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
// Looking for an immediate or a register VL that isn't X0.
assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
"Did not expect X0 VL");

// If we know the demanded VL of UserMI, then we can reduce the VL it
// requires.
if (auto DemandedVL = DemandedVLs[&UserMI]) {
assert(isCandidate(UserMI));
if (RISCV::isVLKnownLE(*DemandedVL, VLOp))
return DemandedVL;
}

return VLOp;
}

std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
// FIXME: Avoid visiting each user for each time we visit something on the
// worklist, combined with an extra visit from the outer loop. Restructure
// along lines of an instcombine style worklist which integrates the outer
// pass.
std::optional<MachineOperand> CommonVL;
for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
const MachineInstr &UserMI = *UserOp.getParent();
Expand Down Expand Up @@ -1285,7 +1294,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) {
return false;
}

auto CommonVL = checkUsers(MI);
auto CommonVL = DemandedVLs[&MI];
if (!CommonVL)
return false;

Expand Down Expand Up @@ -1333,29 +1342,19 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (!ST.hasVInstructions())
return false;

SetVector<MachineInstr *> Worklist;
auto PushOperands = [this, &Worklist](MachineInstr &MI,
bool IgnoreSameBlock) {
for (auto &Op : MI.operands()) {
if (!Op.isReg() || !Op.isUse() || !Op.getReg().isVirtual() ||
!isVectorRegClass(Op.getReg(), MRI))
continue;

MachineInstr *DefMI = MRI->getVRegDef(Op.getReg());
if (!isCandidate(*DefMI))
continue;

if (IgnoreSameBlock && DefMI->getParent() == MI.getParent())
// For each instruction that defines a vector, compute what VL its
// downstream users demand.
for (MachineBasicBlock *MBB : post_order(&MF)) {
assert(MDT->isReachableFromEntry(MBB));
for (MachineInstr &MI : reverse(*MBB)) {
if (!isCandidate(MI))
continue;

Worklist.insert(DefMI);
DemandedVLs.insert({&MI, checkUsers(MI)});
}
};
}

// Do a first pass eagerly rewriting in roughly reverse instruction
// order, populate the worklist with any instructions we might need to
// revisit. We avoid adding definitions to the worklist if they're
// in the same block - we're about to visit them anyways.
// Then go through and see if we can reduce the VL of any instructions to
// only what's demanded.
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
// Avoid unreachable blocks as they have degenerate dominance
Expand All @@ -1368,18 +1367,8 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (!tryReduceVL(MI))
continue;
MadeChange = true;
PushOperands(MI, /*IgnoreSameBlock*/ true);
}
}

while (!Worklist.empty()) {
assert(MadeChange);
MachineInstr &MI = *Worklist.pop_back_val();
assert(isCandidate(MI));
if (!tryReduceVL(MI))
continue;
PushOperands(MI, /*IgnoreSameBlock*/ false);
}

return MadeChange;
}
22 changes: 18 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,11 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: %a1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: %a2:vr = PseudoVADD_VV_M1 $noreg, %a1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: $v8 = COPY %a2
; CHECK-NEXT: PseudoRET
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: %b1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: %b2:vr = PseudoVADD_VV_M1 $noreg, %b1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: $v8 = COPY %b2
; CHECK-NEXT: PseudoRET
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
Expand All @@ -183,15 +181,31 @@ body: |
bb.1:
%a1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
%a2:vr = PseudoVADD_VV_M1 $noreg, %a1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
$v8 = COPY %a2
PseudoRET
bb.2:
%b1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
%b2:vr = PseudoVADD_VV_M1 $noreg, %b1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
$v8 = COPY %b2
PseudoRET
bb.3:
liveins: $x1
%c:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
BEQ $x1, $x0, %bb.1
PseudoBR %bb.2
...
---
name: unreachable
body: |
; CHECK-LABEL: name: unreachable
; CHECK: bb.0:
; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoRET
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoRET
bb.0:
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
PseudoRET
bb.1:
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
PseudoRET
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@

define <vscale x 4 x i32> @same_vl_imm(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK: User VL is: 4
; CHECK-NEXT: Abort due to CommonVL == VLOp, no point in reducing.
; CHECK: Abort due to CommonVL == VLOp, no point in reducing.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why drop NEXT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tryReduceVL is no longer called immediately after the checkUsers, so the CHECK-NEXT failed

%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 4)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, i64 4)
ret <vscale x 4 x i32> %w
}

define <vscale x 4 x i32> @same_vl_reg(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %vl) {
; CHECK: User VL is: %3:gprnox0
; CHECK-NEXT: Abort due to CommonVL == VLOp, no point in reducing.
; CHECK: Abort due to CommonVL == VLOp, no point in reducing.
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %vl)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, i64 %vl)
ret <vscale x 4 x i32> %w
Expand Down