Skip to content

[RISCV] Support postRA vsetvl insertion pass #70549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ static cl::opt<bool> EnableMISchedLoadClustering(
cl::desc("Enable load clustering in the machine scheduler"),
cl::init(false));

static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
"riscv-vsetvl-after-rvv-regalloc", cl::Hidden,
cl::desc("Insert vsetvls after vector register allocation"),
cl::init(true));

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
Expand Down Expand Up @@ -389,6 +394,8 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {

bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
if (EnableVSETVLIAfterRVVRegAlloc)
addPass(createRISCVInsertVSETVLIPass());
addPass(createRISCVCoalesceVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
Expand All @@ -399,6 +406,8 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
if (EnableVSETVLIAfterRVVRegAlloc)
addPass(createRISCVInsertVSETVLIPass());
addPass(createRISCVCoalesceVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
Expand Down Expand Up @@ -547,10 +556,12 @@ void RISCVPassConfig::addPreRegAlloc() {

// Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after
// register coalescing so needVSETVLIPHI doesn't need to look through COPYs.
if (TM->getOptLevel() == CodeGenOptLevel::None)
insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
else
insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
if (!EnableVSETVLIAfterRVVRegAlloc) {
if (TM->getOptLevel() == CodeGenOptLevel::None)
insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
else
insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
}
}

void RISCVPassConfig::addFastRegAlloc() {
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,12 @@
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@
; CHECK-NEXT: Slot index numbering
; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: Register Coalescer
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Rename Disconnected Subregister Components
; CHECK-NEXT: Machine Instruction Scheduler
; CHECK-NEXT: Machine Block Frequency Analysis
Expand All @@ -142,6 +141,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Greedy Register Allocator
; CHECK-NEXT: Virtual Register Rewriter
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: RISC-V Dead register definitions
; CHECK-NEXT: Virtual Register Map
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,36 +24,36 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
Expand All @@ -71,12 +71,12 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, 1048572
; CHECK-NEXT: addi a0, a0, 928
; CHECK-NEXT: vmsbc.vx v0, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
; CHECK-NEXT: vsext.vf2 v10, v8, v0.t
; CHECK-NEXT: lui a0, %hi(var_47)
; CHECK-NEXT: addi a0, a0, %lo(var_47)
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,18 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
; RV32-NEXT: sw a0, 16(sp)
; RV32-NEXT: addi a2, sp, 16
; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a2), zero
; RV32-NEXT: vid.v v8
; RV32-NEXT: vlse64.v v8, (a2), zero
; RV32-NEXT: vid.v v16
; RV32-NEXT: li a2, -1
; RV32-NEXT: vmadd.vx v8, a2, v16
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vmadd.vx v16, a2, v8
; RV32-NEXT: addi a2, sp, 32
; RV32-NEXT: vl2r.v v16, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vmsne.vi v0, v16, 0
; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV32-NEXT: vmsne.vi v0, v8, 0
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: vmerge.vim v16, v16, -1, v0
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
; RV32-NEXT: vand.vv v8, v16, v8
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vmv.x.s a2, v8
; RV32-NEXT: sltu a3, a0, a2
Expand All @@ -108,15 +108,15 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
; RV64: # %bb.0:
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV64-NEXT: vmv.v.x v24, a0
; RV64-NEXT: vid.v v16
; RV64-NEXT: vmv.v.x v16, a0
; RV64-NEXT: vid.v v24
; RV64-NEXT: li a1, -1
; RV64-NEXT: vmadd.vx v16, a1, v24
; RV64-NEXT: vmadd.vx v24, a1, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
; RV64-NEXT: vmerge.vvm v8, v8, v24, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: sub a0, a0, a1
Expand Down
Loading
Loading