Skip to content
Merged
61 changes: 25 additions & 36 deletions llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
Expand Down Expand Up @@ -56,6 +56,10 @@ class RISCVVLOptimizer : public MachineFunctionPass {
std::optional<MachineOperand> checkUsers(MachineInstr &MI);
bool tryReduceVL(MachineInstr &MI);
bool isCandidate(const MachineInstr &MI) const;

/// For a given instruction, records what elements of it are demanded by
/// downstream users.
DenseMap<const MachineInstr *, std::optional<MachineOperand>> DemandedVLs;
};

} // end anonymous namespace
Expand Down Expand Up @@ -1201,14 +1205,19 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
// Looking for an immediate or a register VL that isn't X0.
assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
"Did not expect X0 VL");

// If we know the demanded VL of UserMI, then we can reduce the VL it
// requires.
if (auto DemandedVL = DemandedVLs[&UserMI]) {
assert(isCandidate(UserMI));
if (RISCV::isVLKnownLE(*DemandedVL, VLOp))
return DemandedVL;
}

return VLOp;
}

std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
// FIXME: Avoid visiting each user for each time we visit something on the
// worklist, combined with an extra visit from the outer loop. Restructure
// along lines of an instcombine style worklist which integrates the outer
// pass.
std::optional<MachineOperand> CommonVL;
for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
const MachineInstr &UserMI = *UserOp.getParent();
Expand Down Expand Up @@ -1285,7 +1294,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) {
return false;
}

auto CommonVL = checkUsers(MI);
auto CommonVL = DemandedVLs[&MI];
if (!CommonVL)
return false;

Expand Down Expand Up @@ -1333,29 +1342,19 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (!ST.hasVInstructions())
return false;

SetVector<MachineInstr *> Worklist;
auto PushOperands = [this, &Worklist](MachineInstr &MI,
bool IgnoreSameBlock) {
for (auto &Op : MI.operands()) {
if (!Op.isReg() || !Op.isUse() || !Op.getReg().isVirtual() ||
!isVectorRegClass(Op.getReg(), MRI))
continue;

MachineInstr *DefMI = MRI->getVRegDef(Op.getReg());
if (!isCandidate(*DefMI))
continue;

if (IgnoreSameBlock && DefMI->getParent() == MI.getParent())
// For each instruction that defines a vector, compute what VL its
// downstream users demand.
for (MachineBasicBlock *MBB : post_order(&MF)) {
assert(MDT->isReachableFromEntry(MBB));
for (MachineInstr &MI : reverse(*MBB)) {
if (!isCandidate(MI))
continue;

Worklist.insert(DefMI);
DemandedVLs.insert({&MI, checkUsers(MI)});
}
};
}

// Do a first pass eagerly rewriting in roughly reverse instruction
// order, populate the worklist with any instructions we might need to
// revisit. We avoid adding definitions to the worklist if they're
// in the same block - we're about to visit them anyways.
// Then go through and see if we can reduce the VL of any instructions to
// only what's demanded.
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
// Avoid unreachable blocks as they have degenerate dominance
Expand All @@ -1368,18 +1367,8 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (!tryReduceVL(MI))
continue;
MadeChange = true;
PushOperands(MI, /*IgnoreSameBlock*/ true);
}
}

while (!Worklist.empty()) {
assert(MadeChange);
MachineInstr &MI = *Worklist.pop_back_val();
assert(isCandidate(MI));
if (!tryReduceVL(MI))
continue;
PushOperands(MI, /*IgnoreSameBlock*/ false);
}

return MadeChange;
}
22 changes: 18 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,11 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: %a1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: %a2:vr = PseudoVADD_VV_M1 $noreg, %a1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: $v8 = COPY %a2
; CHECK-NEXT: PseudoRET
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: %b1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: %b2:vr = PseudoVADD_VV_M1 $noreg, %b1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: $v8 = COPY %b2
; CHECK-NEXT: PseudoRET
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
Expand All @@ -183,15 +181,31 @@ body: |
bb.1:
%a1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
%a2:vr = PseudoVADD_VV_M1 $noreg, %a1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
$v8 = COPY %a2
PseudoRET
bb.2:
%b1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
%b2:vr = PseudoVADD_VV_M1 $noreg, %b1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
$v8 = COPY %b2
PseudoRET
bb.3:
liveins: $x1
%c:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
BEQ $x1, $x0, %bb.1
PseudoBR %bb.2
...
---
name: unreachable
body: |
; CHECK-LABEL: name: unreachable
; CHECK: bb.0:
; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoRET
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: PseudoRET
bb.0:
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
PseudoRET
bb.1:
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
PseudoRET
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@

define <vscale x 4 x i32> @same_vl_imm(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK: User VL is: 4
; CHECK-NEXT: Abort due to CommonVL == VLOp, no point in reducing.
; CHECK: Abort due to CommonVL == VLOp, no point in reducing.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why drop NEXT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tryReduceVL is no longer called immediately after the checkUsers, so the CHECK-NEXT failed

%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 4)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, i64 4)
ret <vscale x 4 x i32> %w
}

define <vscale x 4 x i32> @same_vl_reg(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %vl) {
; CHECK: User VL is: %3:gprnox0
; CHECK-NEXT: Abort due to CommonVL == VLOp, no point in reducing.
; CHECK: Abort due to CommonVL == VLOp, no point in reducing.
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %vl)
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, i64 %vl)
ret <vscale x 4 x i32> %w
Expand Down