Skip to content

Commit 00fabd2

Browse files
[RISCV][RegAlloc] Add getCSRFirstUseCost for RISC-V (#131349)
This is based off of 63efd8e. The following table shows the percent change to the dynamic instruction count when the function in this patch returns 0 (default) versus other values. | benchmark | % speedup 1 over 0 | % speedup 4 over 0 | % speedup 16 over 0 | % speedup 64 over 0 | % speedup 128 over 0 | | --------------- | ---------------------- | --------------------- | --------------------- | -------------------- | -------------------- | | 500.perlbench_r | 0.001018570165 | 0.001049508358 | 0.001001106529 | 0.03382582818 | 0.03395354577 | | 502.gcc_r | 0.02850551412 | 0.02170512371 | 0.01453021263 | 0.06011008637 | 0.1215691521 | | 505.mcf_r | -0.00009506373338 | -0.00009090057642 | -0.0000860991497 | -0.00005027849766 | 0.00001251173791 | | 520.omnetpp_r | 0.2958940288 | 0.2959715925 | 0.2961141505 | 0.2959823497 | 0.2963124341 | | 523.xalancbmk_r | -0.0327074721 | -0.01037021046 | -0.3226810542 | 0.02127133714 | 0.02765388389 | | 525.x264_r | 0.0000001381714403 | -0.00000007041540345 | -0.00000002156399465 | 0.0000002108993364 | 0.0000002463382874 | | 531.deepsjeng_r | 0.00000000339777238 | 0.000000003874652714 | 0.000000003636212547 | 0.000000003874652714 | 0.000000003159332213 | | 541.leela_r | 0.0009186059953 | -0.000424159199 | 0.0004984456879 | 0.274948447 | 0.8135521414 | | 557.xz_r | -0.000000003547118854 | -0.00004896449559 | -0.00004910691576 | -0.0000491109983 | -0.00004895599589 | | geomean | 0.03265937388 | 0.03424232324 | -0.00107917442 | 0.07629116165 | 0.1439913192 | The following table shows the percent change to the runtime when the function in this patch returns 0 (default) versus other values. | benchmark | % speedup 1 over 0 | % speedup 4 over 0 | % speedup 16 over 0 | % speedup 64 over 0 | %speedup 128 over 0 | | --------------- | ------------------ | ------------------ | ------------------- | ------------------- | ------------------- | | 500.perlbench_r | 0.1722356761 | 0.2269681109 | 0.2596825578 | 0.361573851 | 1.15041305 | | 502.gcc_r | -0.548415855 | -0.06187002799 | -0.5553684674 | -0.8876686237 | -0.4668665535 | | 505.mcf_r | -0.8786414258 | -0.4150938441 | -1.035517726 | -0.1860770377 | -0.01904825648 | | 520.omnetpp_r | 0.4130256072 | 0.6595976188 | 0.897332171 | 0.6252625622 | 0.3869467278 | | 523.xalancbmk_r | 1.318132014 | -0.003927574 | 1.025962975 | 1.090320253 | -0.789206202 | | 525.x264_r | -0.03112871796 | -0.00167557587 | 0.06932423155 | -0.1919840015 | -0.1203585732 | | 531.deepsjeng_r | -0.259516072 | -0.01973455652 | -0.2723227894 | -0.005417022257 | -0.02222388177 | | 541.leela_r | -0.3497178495 | -0.3510447393 | 0.1274508001 | 0.6485542452 | 0.2880651727 | | 557.xz_r | 0.7683565263 | -0.2197509447 | -0.0431183874 | 0.07518130872 | 0.5236853039 | | geomean | 0.06506952742 | -0.0211865386 | 0.05072694648 | 0.1684530637 | 0.1020533557 | I chose to set the value to 5 on RISC-V because it has improvement to both the dynamic IC and the runtime and because it showed good results empirically and had a similar effect as setting it to higher numbers. I looked at some diff and it seems like this patch leads to two things: 1. Less spilling -- not spilling the CSR led to better register allocation and helped us avoid spills down the line 2. Avoid spilling CSR but spill more on paths that static heuristics estimate as cold.
1 parent 221b011 commit 00fabd2

File tree

3 files changed

+119
-3
lines changed

3 files changed

+119
-3
lines changed

llvm/lib/CodeGen/RegAllocGreedy.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -2375,10 +2375,12 @@ void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
23752375
}
23762376

23772377
void RAGreedy::initializeCSRCost() {
2378-
// We use the larger one out of the command-line option and the value report
2379-
// by TRI.
2378+
// We use the command-line option if it is explicitly set, otherwise use the
2379+
// larger one out of the command-line option and the value reported by TRI.
23802380
CSRCost = BlockFrequency(
2381-
std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2381+
CSRFirstTimeCost.getNumOccurrences()
2382+
? CSRFirstTimeCost
2383+
: std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
23822384
if (!CSRCost.getFrequency())
23832385
return;
23842386

llvm/lib/Target/RISCV/RISCVRegisterInfo.h

+7
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
6161
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
6262
CallingConv::ID) const override;
6363

64+
unsigned getCSRFirstUseCost() const override {
65+
// The cost will be compared against BlockFrequency where entry has the
66+
// value of 1 << 14. A value of 5 will choose to spill or split cold
67+
// path instead of using a callee-saved register.
68+
return 5;
69+
}
70+
6471
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
6572

6673
const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const override;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv64 -regalloc-csr-first-time-cost=0 < %s | FileCheck %s -check-prefix=ZERO-COST
3+
; RUN: llc -mtriple=riscv64 < %s | FileCheck %s -check-prefix=DEFAULT-COST
4+
5+
define fastcc void @Perl_sv_setnv(i8 %c, ptr %.str.54.3682) nounwind {
6+
; ZERO-COST-LABEL: Perl_sv_setnv:
7+
; ZERO-COST: # %bb.0: # %entry
8+
; ZERO-COST-NEXT: addi sp, sp, -32
9+
; ZERO-COST-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
10+
; ZERO-COST-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
11+
; ZERO-COST-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
12+
; ZERO-COST-NEXT: andi a0, a0, 255
13+
; ZERO-COST-NEXT: li a2, 2
14+
; ZERO-COST-NEXT: blt a2, a0, .LBB0_3
15+
; ZERO-COST-NEXT: # %bb.1: # %entry
16+
; ZERO-COST-NEXT: beqz a0, .LBB0_4
17+
; ZERO-COST-NEXT: # %bb.2: # %entry
18+
; ZERO-COST-NEXT: mv s0, a1
19+
; ZERO-COST-NEXT: li a1, 1
20+
; ZERO-COST-NEXT: beq a0, a1, .LBB0_6
21+
; ZERO-COST-NEXT: j .LBB0_7
22+
; ZERO-COST-NEXT: .LBB0_3: # %entry
23+
; ZERO-COST-NEXT: li a2, 3
24+
; ZERO-COST-NEXT: bne a0, a2, .LBB0_5
25+
; ZERO-COST-NEXT: .LBB0_4: # %sw.bb3
26+
; ZERO-COST-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
27+
; ZERO-COST-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
28+
; ZERO-COST-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
29+
; ZERO-COST-NEXT: addi sp, sp, 32
30+
; ZERO-COST-NEXT: ret
31+
; ZERO-COST-NEXT: .LBB0_5: # %entry
32+
; ZERO-COST-NEXT: mv s0, a1
33+
; ZERO-COST-NEXT: li a1, 12
34+
; ZERO-COST-NEXT: bne a0, a1, .LBB0_7
35+
; ZERO-COST-NEXT: .LBB0_6: # %sw.bb34.i
36+
; ZERO-COST-NEXT: li s0, 0
37+
; ZERO-COST-NEXT: .LBB0_7: # %Perl_sv_reftype.exit
38+
; ZERO-COST-NEXT: li s1, 0
39+
; ZERO-COST-NEXT: li a0, 0
40+
; ZERO-COST-NEXT: li a1, 0
41+
; ZERO-COST-NEXT: jalr s1
42+
; ZERO-COST-NEXT: li a0, 0
43+
; ZERO-COST-NEXT: mv a1, s0
44+
; ZERO-COST-NEXT: li a2, 0
45+
; ZERO-COST-NEXT: jalr s1
46+
;
47+
; DEFAULT-COST-LABEL: Perl_sv_setnv:
48+
; DEFAULT-COST: # %bb.0: # %entry
49+
; DEFAULT-COST-NEXT: addi sp, sp, -32
50+
; DEFAULT-COST-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
51+
; DEFAULT-COST-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
52+
; DEFAULT-COST-NEXT: andi a0, a0, 255
53+
; DEFAULT-COST-NEXT: li a2, 2
54+
; DEFAULT-COST-NEXT: blt a2, a0, .LBB0_3
55+
; DEFAULT-COST-NEXT: # %bb.1: # %entry
56+
; DEFAULT-COST-NEXT: beqz a0, .LBB0_4
57+
; DEFAULT-COST-NEXT: # %bb.2: # %entry
58+
; DEFAULT-COST-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
59+
; DEFAULT-COST-NEXT: li a1, 1
60+
; DEFAULT-COST-NEXT: beq a0, a1, .LBB0_6
61+
; DEFAULT-COST-NEXT: j .LBB0_7
62+
; DEFAULT-COST-NEXT: .LBB0_3: # %entry
63+
; DEFAULT-COST-NEXT: li a2, 3
64+
; DEFAULT-COST-NEXT: bne a0, a2, .LBB0_5
65+
; DEFAULT-COST-NEXT: .LBB0_4: # %sw.bb3
66+
; DEFAULT-COST-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
67+
; DEFAULT-COST-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
68+
; DEFAULT-COST-NEXT: addi sp, sp, 32
69+
; DEFAULT-COST-NEXT: ret
70+
; DEFAULT-COST-NEXT: .LBB0_5: # %entry
71+
; DEFAULT-COST-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
72+
; DEFAULT-COST-NEXT: li a1, 12
73+
; DEFAULT-COST-NEXT: bne a0, a1, .LBB0_7
74+
; DEFAULT-COST-NEXT: .LBB0_6: # %sw.bb34.i
75+
; DEFAULT-COST-NEXT: sd zero, 8(sp) # 8-byte Folded Spill
76+
; DEFAULT-COST-NEXT: .LBB0_7: # %Perl_sv_reftype.exit
77+
; DEFAULT-COST-NEXT: li s0, 0
78+
; DEFAULT-COST-NEXT: li a0, 0
79+
; DEFAULT-COST-NEXT: li a1, 0
80+
; DEFAULT-COST-NEXT: jalr s0
81+
; DEFAULT-COST-NEXT: li a0, 0
82+
; DEFAULT-COST-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
83+
; DEFAULT-COST-NEXT: li a2, 0
84+
; DEFAULT-COST-NEXT: jalr s0
85+
entry:
86+
switch i8 %c, label %Perl_sv_reftype.exit [
87+
i8 1, label %sw.bb4
88+
i8 12, label %sw.bb34.i
89+
i8 3, label %sw.bb3
90+
i8 0, label %sw.bb3
91+
]
92+
93+
sw.bb3: ; preds = %entry, %entry
94+
ret void
95+
96+
sw.bb4: ; preds = %entry
97+
br label %Perl_sv_reftype.exit
98+
99+
sw.bb34.i: ; preds = %entry
100+
br label %Perl_sv_reftype.exit
101+
102+
Perl_sv_reftype.exit: ; preds = %sw.bb34.i, %sw.bb4, %entry
103+
%retval.0.i = phi ptr [ null, %sw.bb34.i ], [ null, %sw.bb4 ], [ %.str.54.3682, %entry ]
104+
%call17 = tail call fastcc i64 null(ptr null, i32 0)
105+
tail call void (ptr, ...) null(ptr null, ptr %retval.0.i, ptr null)
106+
unreachable
107+
}

0 commit comments

Comments
 (0)