Skip to content

Commit a74ee80

Browse files
committed
[RISCV] Support post-regalloc vsetvli pass
1 parent b2d7d72 commit a74ee80

File tree

242 files changed

+9762
-8889
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

242 files changed

+9762
-8889
lines changed

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ static cl::opt<bool> EnableMISchedLoadClustering(
9696
cl::desc("Enable load clustering in the machine scheduler"),
9797
cl::init(false));
9898

99+
static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
100+
"riscv-vsetvli-after-rvv-regalloc", cl::Hidden,
101+
cl::desc("vsetvl insertion after rvv regalloc"), cl::init(true));
102+
99103
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
100104
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
101105
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -389,6 +393,8 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
389393

390394
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
391395
addPass(createRVVRegAllocPass(false));
396+
if (EnableVSETVLIAfterRVVRegAlloc)
397+
addPass(createRISCVInsertVSETVLIPass());
392398
addPass(createRISCVCoalesceVSETVLIPass());
393399
if (TM->getOptLevel() != CodeGenOptLevel::None &&
394400
EnableRISCVDeadRegisterElimination)
@@ -399,6 +405,8 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
399405
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
400406
addPass(createRVVRegAllocPass(true));
401407
addPass(createVirtRegRewriter(false));
408+
if (EnableVSETVLIAfterRVVRegAlloc)
409+
addPass(createRISCVInsertVSETVLIPass());
402410
addPass(createRISCVCoalesceVSETVLIPass());
403411
if (TM->getOptLevel() != CodeGenOptLevel::None &&
404412
EnableRISCVDeadRegisterElimination)
@@ -547,10 +555,12 @@ void RISCVPassConfig::addPreRegAlloc() {
547555

548556
// Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after
549557
// register coalescing so needVSETVLIPHI doesn't need to look through COPYs.
550-
if (TM->getOptLevel() == CodeGenOptLevel::None)
551-
insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
552-
else
553-
insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
558+
if (!EnableVSETVLIAfterRVVRegAlloc) {
559+
if (TM->getOptLevel() == CodeGenOptLevel::None)
560+
insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
561+
else
562+
insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
563+
}
554564
}
555565

556566
void RISCVPassConfig::addFastRegAlloc() {

llvm/test/CodeGen/RISCV/O0-pipeline.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,12 @@
4444
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
4545
; CHECK-NEXT: Init Undef Pass
4646
; CHECK-NEXT: Eliminate PHI nodes for register allocation
47-
; CHECK-NEXT: MachineDominator Tree Construction
48-
; CHECK-NEXT: Slot index numbering
49-
; CHECK-NEXT: Live Interval Analysis
50-
; CHECK-NEXT: RISC-V Insert VSETVLI pass
5147
; CHECK-NEXT: Two-Address instruction pass
5248
; CHECK-NEXT: Fast Register Allocator
49+
; CHECK-NEXT: MachineDominator Tree Construction
5350
; CHECK-NEXT: Slot index numbering
5451
; CHECK-NEXT: Live Interval Analysis
52+
; CHECK-NEXT: RISC-V Insert VSETVLI pass
5553
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
5654
; CHECK-NEXT: Fast Register Allocator
5755
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@
128128
; CHECK-NEXT: Slot index numbering
129129
; CHECK-NEXT: Live Interval Analysis
130130
; CHECK-NEXT: Register Coalescer
131-
; CHECK-NEXT: RISC-V Insert VSETVLI pass
132131
; CHECK-NEXT: Rename Disconnected Subregister Components
133132
; CHECK-NEXT: Machine Instruction Scheduler
134133
; CHECK-NEXT: Machine Block Frequency Analysis
@@ -142,6 +141,7 @@
142141
; CHECK-NEXT: Machine Optimization Remark Emitter
143142
; CHECK-NEXT: Greedy Register Allocator
144143
; CHECK-NEXT: Virtual Register Rewriter
144+
; CHECK-NEXT: RISC-V Insert VSETVLI pass
145145
; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
146146
; CHECK-NEXT: RISC-V Dead register definitions
147147
; CHECK-NEXT: Virtual Register Map

llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,36 +24,36 @@ define void @_Z3foov() {
2424
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
2525
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
2626
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
27-
; CHECK-NEXT: vle16.v v8, (a0)
27+
; CHECK-NEXT: vle16.v v10, (a0)
2828
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
2929
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
30-
; CHECK-NEXT: vle8.v v10, (a0)
30+
; CHECK-NEXT: vle8.v v8, (a0)
3131
; CHECK-NEXT: csrr a0, vlenb
3232
; CHECK-NEXT: slli a0, a0, 3
3333
; CHECK-NEXT: add a0, sp, a0
3434
; CHECK-NEXT: addi a0, a0, 16
35-
; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
35+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
3636
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
3737
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
38-
; CHECK-NEXT: vle16.v v10, (a0)
38+
; CHECK-NEXT: vle16.v v12, (a0)
3939
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
4040
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
41-
; CHECK-NEXT: vle16.v v12, (a0)
41+
; CHECK-NEXT: vle16.v v14, (a0)
4242
; CHECK-NEXT: addi a0, sp, 16
4343
; CHECK-NEXT: csrr a1, vlenb
4444
; CHECK-NEXT: slli a1, a1, 1
45-
; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
46-
; CHECK-NEXT: add a0, a0, a1
4745
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
4846
; CHECK-NEXT: add a0, a0, a1
4947
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
5048
; CHECK-NEXT: add a0, a0, a1
5149
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
50+
; CHECK-NEXT: add a0, a0, a1
51+
; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
5252
; CHECK-NEXT: #APP
5353
; CHECK-NEXT: #NO_APP
54-
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
5554
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
5655
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
56+
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
5757
; CHECK-NEXT: vle16.v v8, (a0)
5858
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
5959
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
@@ -71,12 +71,12 @@ define void @_Z3foov() {
7171
; CHECK-NEXT: lui a0, 1048572
7272
; CHECK-NEXT: addi a0, a0, 928
7373
; CHECK-NEXT: vmsbc.vx v0, v8, a0
74-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
7574
; CHECK-NEXT: csrr a0, vlenb
7675
; CHECK-NEXT: slli a0, a0, 3
7776
; CHECK-NEXT: add a0, sp, a0
7877
; CHECK-NEXT: addi a0, a0, 16
7978
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
79+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
8080
; CHECK-NEXT: vsext.vf2 v10, v8, v0.t
8181
; CHECK-NEXT: lui a0, %hi(var_47)
8282
; CHECK-NEXT: addi a0, a0, %lo(var_47)

llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,18 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
7575
; RV32-NEXT: sw a0, 16(sp)
7676
; RV32-NEXT: addi a2, sp, 16
7777
; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
78-
; RV32-NEXT: vlse64.v v16, (a2), zero
79-
; RV32-NEXT: vid.v v8
78+
; RV32-NEXT: vlse64.v v8, (a2), zero
79+
; RV32-NEXT: vid.v v16
8080
; RV32-NEXT: li a2, -1
81-
; RV32-NEXT: vmadd.vx v8, a2, v16
82-
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
81+
; RV32-NEXT: vmadd.vx v16, a2, v8
8382
; RV32-NEXT: addi a2, sp, 32
84-
; RV32-NEXT: vl2r.v v16, (a2) # Unknown-size Folded Reload
85-
; RV32-NEXT: vmsne.vi v0, v16, 0
83+
; RV32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload
84+
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
85+
; RV32-NEXT: vmsne.vi v0, v8, 0
8686
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
87-
; RV32-NEXT: vmv.v.i v16, 0
88-
; RV32-NEXT: vmerge.vim v16, v16, -1, v0
89-
; RV32-NEXT: vand.vv v8, v8, v16
87+
; RV32-NEXT: vmv.v.i v8, 0
88+
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
89+
; RV32-NEXT: vand.vv v8, v16, v8
9090
; RV32-NEXT: vredmaxu.vs v8, v8, v8
9191
; RV32-NEXT: vmv.x.s a2, v8
9292
; RV32-NEXT: sltu a3, a0, a2
@@ -108,15 +108,15 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
108108
; RV64: # %bb.0:
109109
; RV64-NEXT: csrr a0, vlenb
110110
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
111-
; RV64-NEXT: vmv.v.x v24, a0
112-
; RV64-NEXT: vid.v v16
111+
; RV64-NEXT: vmv.v.x v16, a0
112+
; RV64-NEXT: vid.v v24
113113
; RV64-NEXT: li a1, -1
114-
; RV64-NEXT: vmadd.vx v16, a1, v24
114+
; RV64-NEXT: vmadd.vx v24, a1, v16
115115
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
116116
; RV64-NEXT: vmsne.vi v0, v8, 0
117117
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
118118
; RV64-NEXT: vmv.v.i v8, 0
119-
; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
119+
; RV64-NEXT: vmerge.vvm v8, v8, v24, v0
120120
; RV64-NEXT: vredmaxu.vs v8, v8, v8
121121
; RV64-NEXT: vmv.x.s a1, v8
122122
; RV64-NEXT: sub a0, a0, a1

0 commit comments

Comments
 (0)