Skip to content

Commit 675e7bd

Browse files
authored
[RISCV] Support postRA vsetvl insertion pass (#70549)
This patch try to get rid of vsetvl implict vl/vtype def-use chain and improve the register allocation quality by moving the vsetvl insertion pass after RVV register allocation It will gain the benefit for the following optimization from 1. unblock scheduler's constraints by removing vl/vtype def-use chain 2. Support RVV re-materialization 3. Support partial spill This patch add a new option `-riscv-vsetvl-after-rvv-regalloc=<1|0>` to control this feature and default set as disable.
1 parent 9c78481 commit 675e7bd

File tree

242 files changed

+9721
-8870
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

242 files changed

+9721
-8870
lines changed

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ static cl::opt<bool> EnableMISchedLoadClustering(
9696
cl::desc("Enable load clustering in the machine scheduler"),
9797
cl::init(false));
9898

99+
static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
100+
"riscv-vsetvl-after-rvv-regalloc", cl::Hidden,
101+
cl::desc("Insert vsetvls after vector register allocation"),
102+
cl::init(true));
103+
99104
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
100105
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
101106
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -389,6 +394,8 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
389394

390395
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
391396
addPass(createRVVRegAllocPass(false));
397+
if (EnableVSETVLIAfterRVVRegAlloc)
398+
addPass(createRISCVInsertVSETVLIPass());
392399
addPass(createRISCVCoalesceVSETVLIPass());
393400
if (TM->getOptLevel() != CodeGenOptLevel::None &&
394401
EnableRISCVDeadRegisterElimination)
@@ -399,6 +406,8 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
399406
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
400407
addPass(createRVVRegAllocPass(true));
401408
addPass(createVirtRegRewriter(false));
409+
if (EnableVSETVLIAfterRVVRegAlloc)
410+
addPass(createRISCVInsertVSETVLIPass());
402411
addPass(createRISCVCoalesceVSETVLIPass());
403412
if (TM->getOptLevel() != CodeGenOptLevel::None &&
404413
EnableRISCVDeadRegisterElimination)
@@ -547,10 +556,12 @@ void RISCVPassConfig::addPreRegAlloc() {
547556

548557
// Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after
549558
// register coalescing so needVSETVLIPHI doesn't need to look through COPYs.
550-
if (TM->getOptLevel() == CodeGenOptLevel::None)
551-
insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
552-
else
553-
insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
559+
if (!EnableVSETVLIAfterRVVRegAlloc) {
560+
if (TM->getOptLevel() == CodeGenOptLevel::None)
561+
insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
562+
else
563+
insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
564+
}
554565
}
555566

556567
void RISCVPassConfig::addFastRegAlloc() {

llvm/test/CodeGen/RISCV/O0-pipeline.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,12 @@
4444
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
4545
; CHECK-NEXT: Init Undef Pass
4646
; CHECK-NEXT: Eliminate PHI nodes for register allocation
47-
; CHECK-NEXT: MachineDominator Tree Construction
48-
; CHECK-NEXT: Slot index numbering
49-
; CHECK-NEXT: Live Interval Analysis
50-
; CHECK-NEXT: RISC-V Insert VSETVLI pass
5147
; CHECK-NEXT: Two-Address instruction pass
5248
; CHECK-NEXT: Fast Register Allocator
49+
; CHECK-NEXT: MachineDominator Tree Construction
5350
; CHECK-NEXT: Slot index numbering
5451
; CHECK-NEXT: Live Interval Analysis
52+
; CHECK-NEXT: RISC-V Insert VSETVLI pass
5553
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
5654
; CHECK-NEXT: Fast Register Allocator
5755
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@
128128
; CHECK-NEXT: Slot index numbering
129129
; CHECK-NEXT: Live Interval Analysis
130130
; CHECK-NEXT: Register Coalescer
131-
; CHECK-NEXT: RISC-V Insert VSETVLI pass
132131
; CHECK-NEXT: Rename Disconnected Subregister Components
133132
; CHECK-NEXT: Machine Instruction Scheduler
134133
; CHECK-NEXT: Machine Block Frequency Analysis
@@ -142,6 +141,7 @@
142141
; CHECK-NEXT: Machine Optimization Remark Emitter
143142
; CHECK-NEXT: Greedy Register Allocator
144143
; CHECK-NEXT: Virtual Register Rewriter
144+
; CHECK-NEXT: RISC-V Insert VSETVLI pass
145145
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
146146
; CHECK-NEXT: RISC-V Dead register definitions
147147
; CHECK-NEXT: Virtual Register Map

llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,36 +24,36 @@ define void @_Z3foov() {
2424
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
2525
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
2626
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
27-
; CHECK-NEXT: vle16.v v8, (a0)
27+
; CHECK-NEXT: vle16.v v10, (a0)
2828
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
2929
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
30-
; CHECK-NEXT: vle8.v v10, (a0)
30+
; CHECK-NEXT: vle8.v v8, (a0)
3131
; CHECK-NEXT: csrr a0, vlenb
3232
; CHECK-NEXT: slli a0, a0, 3
3333
; CHECK-NEXT: add a0, sp, a0
3434
; CHECK-NEXT: addi a0, a0, 16
35-
; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
35+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
3636
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
3737
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
38-
; CHECK-NEXT: vle16.v v10, (a0)
38+
; CHECK-NEXT: vle16.v v12, (a0)
3939
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
4040
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
41-
; CHECK-NEXT: vle16.v v12, (a0)
41+
; CHECK-NEXT: vle16.v v14, (a0)
4242
; CHECK-NEXT: addi a0, sp, 16
4343
; CHECK-NEXT: csrr a1, vlenb
4444
; CHECK-NEXT: slli a1, a1, 1
45-
; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
46-
; CHECK-NEXT: add a0, a0, a1
4745
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
4846
; CHECK-NEXT: add a0, a0, a1
4947
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
5048
; CHECK-NEXT: add a0, a0, a1
5149
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
50+
; CHECK-NEXT: add a0, a0, a1
51+
; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
5252
; CHECK-NEXT: #APP
5353
; CHECK-NEXT: #NO_APP
54-
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
5554
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
5655
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
56+
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
5757
; CHECK-NEXT: vle16.v v8, (a0)
5858
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
5959
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
@@ -71,12 +71,12 @@ define void @_Z3foov() {
7171
; CHECK-NEXT: lui a0, 1048572
7272
; CHECK-NEXT: addi a0, a0, 928
7373
; CHECK-NEXT: vmsbc.vx v0, v8, a0
74-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
7574
; CHECK-NEXT: csrr a0, vlenb
7675
; CHECK-NEXT: slli a0, a0, 3
7776
; CHECK-NEXT: add a0, sp, a0
7877
; CHECK-NEXT: addi a0, a0, 16
7978
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
79+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
8080
; CHECK-NEXT: vsext.vf2 v10, v8, v0.t
8181
; CHECK-NEXT: lui a0, %hi(var_47)
8282
; CHECK-NEXT: addi a0, a0, %lo(var_47)

llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,18 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
7575
; RV32-NEXT: sw a0, 16(sp)
7676
; RV32-NEXT: addi a2, sp, 16
7777
; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
78-
; RV32-NEXT: vlse64.v v16, (a2), zero
79-
; RV32-NEXT: vid.v v8
78+
; RV32-NEXT: vlse64.v v8, (a2), zero
79+
; RV32-NEXT: vid.v v16
8080
; RV32-NEXT: li a2, -1
81-
; RV32-NEXT: vmadd.vx v8, a2, v16
82-
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
81+
; RV32-NEXT: vmadd.vx v16, a2, v8
8382
; RV32-NEXT: addi a2, sp, 32
84-
; RV32-NEXT: vl2r.v v16, (a2) # Unknown-size Folded Reload
85-
; RV32-NEXT: vmsne.vi v0, v16, 0
83+
; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
84+
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
85+
; RV32-NEXT: vmsne.vi v0, v8, 0
8686
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
87-
; RV32-NEXT: vmv.v.i v16, 0
88-
; RV32-NEXT: vmerge.vim v16, v16, -1, v0
89-
; RV32-NEXT: vand.vv v8, v8, v16
87+
; RV32-NEXT: vmv.v.i v8, 0
88+
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
89+
; RV32-NEXT: vand.vv v8, v16, v8
9090
; RV32-NEXT: vredmaxu.vs v8, v8, v8
9191
; RV32-NEXT: vmv.x.s a2, v8
9292
; RV32-NEXT: sltu a3, a0, a2
@@ -108,15 +108,15 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
108108
; RV64: # %bb.0:
109109
; RV64-NEXT: csrr a0, vlenb
110110
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
111-
; RV64-NEXT: vmv.v.x v24, a0
112-
; RV64-NEXT: vid.v v16
111+
; RV64-NEXT: vmv.v.x v16, a0
112+
; RV64-NEXT: vid.v v24
113113
; RV64-NEXT: li a1, -1
114-
; RV64-NEXT: vmadd.vx v16, a1, v24
114+
; RV64-NEXT: vmadd.vx v24, a1, v16
115115
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
116116
; RV64-NEXT: vmsne.vi v0, v8, 0
117117
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
118118
; RV64-NEXT: vmv.v.i v8, 0
119-
; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
119+
; RV64-NEXT: vmerge.vvm v8, v8, v24, v0
120120
; RV64-NEXT: vredmaxu.vs v8, v8, v8
121121
; RV64-NEXT: vmv.x.s a1, v8
122122
; RV64-NEXT: sub a0, a0, a1

0 commit comments

Comments
 (0)