Skip to content

Commit 4946dbe

Browse files
[RISCV][RegAlloc] Add getCSRFirstUseCost for RISC-V
This is based off of 63efd8e. The following table shows the percent change to the dynamic instruction count when the function in this patch returns 0 (default) versus other values. | benchmark | % speedup 1 over 0 | % speedup 4 over 0 | % speedup 16 over 0 | % speedup 64 over 0 | % speedup 128 over 0 | | --------------- | ---------------------- | --------------------- | --------------------- | -------------------- | -------------------- | | 500.perlbench_r | 0.001018570165 | 0.001049508358 | 0.001001106529 | 0.03382582818 | 0.03395354577 | | 502.gcc_r | 0.02850551412 | 0.02170512371 | 0.01453021263 | 0.06011008637 | 0.1215691521 | | 505.mcf_r | -0.00009506373338 | -0.00009090057642 | -0.0000860991497 | -0.00005027849766 | 0.00001251173791 | | 520.omnetpp_r | 0.2958940288 | 0.2959715925 | 0.2961141505 | 0.2959823497 | 0.2963124341 | | 523.xalancbmk_r | -0.0327074721 | -0.01037021046 | -0.3226810542 | 0.02127133714 | 0.02765388389 | | 525.x264_r | 0.0000001381714403 | -0.00000007041540345 | -0.00000002156399465 | 0.0000002108993364 | 0.0000002463382874 | | 531.deepsjeng_r | 0.00000000339777238 | 0.000000003874652714 | 0.000000003636212547 | 0.000000003874652714 | 0.000000003159332213 | | 541.leela_r | 0.0009186059953 | -0.000424159199 | 0.0004984456879 | 0.274948447 | 0.8135521414 | | 557.xz_r | -0.000000003547118854 | -0.00004896449559 | -0.00004910691576 | -0.0000491109983 | -0.00004895599589 | | geomean | 0.03265937388 | 0.03424232324 | -0.00107917442 | 0.07629116165 | 0.1439913192 | The following table shows the percent change to the runtime when the function in this patch returns 0 (default) versus other values. | benchmark | % speedup 1 over 0 | % speedup 4 over 0 | % speedup 16 over 0 | % speedup 64 over 0 | %speedup 128 over 0 | | --------------- | ------------------ | ------------------ | ------------------- | ------------------- | ------------------- | | 500.perlbench_r | 0.1722356761 | 0.2269681109 | 0.2596825578 | 0.361573851 | 1.15041305 | | 502.gcc_r | -0.548415855 | -0.06187002799 | -0.5553684674 | -0.8876686237 | -0.4668665535 | | 505.mcf_r | -0.8786414258 | -0.4150938441 | -1.035517726 | -0.1860770377 | -0.01904825648 | | 520.omnetpp_r | 0.4130256072 | 0.6595976188 | 0.897332171 | 0.6252625622 | 0.3869467278 | | 523.xalancbmk_r | 1.318132014 | -0.003927574 | 1.025962975 | 1.090320253 | -0.789206202 | | 525.x264_r | -0.03112871796 | -0.00167557587 | 0.06932423155 | -0.1919840015 | -0.1203585732 | | 531.deepsjeng_r | -0.259516072 | -0.01973455652 | -0.2723227894 | -0.005417022257 | -0.02222388177 | | 541.leela_r | -0.3497178495 | -0.3510447393 | 0.1274508001 | 0.6485542452 | 0.2880651727 | | 557.xz_r | 0.7683565263 | -0.2197509447 | -0.0431183874 | 0.07518130872 | 0.5236853039 | | geomean | 0.06506952742 | -0.0211865386 | 0.05072694648 | 0.1684530637 | 0.1020533557 | I chose to set the value to 64 on RISC-V because it has improvement to both the dynamic IC and the runtime and because AMDGPU set their number to 100, and callee-saved-spills are probably less expensive on RISC-V than on AMDGPU. I looked at some diff and it seems like this patch leads to two things: 1. Less spilling -- not spilling the CSR led to better register allocation and helped us avoid spills down the line 2. Avoid spilling CSR but spill more on paths that static heuristics estimate as cold.
1 parent f4d599c commit 4946dbe

File tree

2 files changed

+112
-0
lines changed

2 files changed

+112
-0
lines changed

llvm/lib/Target/RISCV/RISCVRegisterInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
6161
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
6262
CallingConv::ID) const override;
6363

64+
unsigned getCSRFirstUseCost() const override {
65+
// The cost will be compared against BlockFrequency where entry has the
66+
// value of 1 << 14. A value of 64 will choose to spill or split cold
67+
// path instead of using a callee-saved register.
68+
return 64;
69+
}
70+
6471
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
6572

6673
const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const override;
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc %s -mtriple=riscv64 -regalloc-csr-first-time-cost=0 | FileCheck %s -check-prefix=ZERO-COST
3+
; RUN: llc %s -mtriple=riscv64 -regalloc-csr-first-time-cost=64 | FileCheck %s -check-prefix=SOME-COST
4+
5+
define fastcc void @Perl_sv_setnv(ptr %.str.54.3682) {
6+
; ZERO-COST-LABEL: Perl_sv_setnv:
7+
; ZERO-COST: # %bb.0: # %entry
8+
; ZERO-COST-NEXT: addi sp, sp, -32
9+
; ZERO-COST-NEXT: .cfi_def_cfa_offset 32
10+
; ZERO-COST-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
11+
; ZERO-COST-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
12+
; ZERO-COST-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
13+
; ZERO-COST-NEXT: .cfi_offset ra, -8
14+
; ZERO-COST-NEXT: .cfi_offset s0, -16
15+
; ZERO-COST-NEXT: .cfi_offset s1, -24
16+
; ZERO-COST-NEXT: bnez zero, .LBB0_5
17+
; ZERO-COST-NEXT: # %bb.1: # %entry
18+
; ZERO-COST-NEXT: li a1, 1
19+
; ZERO-COST-NEXT: bnez a1, .LBB0_6
20+
; ZERO-COST-NEXT: .LBB0_2: # %entry
21+
; ZERO-COST-NEXT: mv s0, a0
22+
; ZERO-COST-NEXT: beqz zero, .LBB0_4
23+
; ZERO-COST-NEXT: # %bb.3: # %sw.bb34.i
24+
; ZERO-COST-NEXT: li s0, 0
25+
; ZERO-COST-NEXT: .LBB0_4: # %Perl_sv_reftype.exit
26+
; ZERO-COST-NEXT: li s1, 0
27+
; ZERO-COST-NEXT: li a0, 0
28+
; ZERO-COST-NEXT: li a1, 0
29+
; ZERO-COST-NEXT: jalr s1
30+
; ZERO-COST-NEXT: li a0, 0
31+
; ZERO-COST-NEXT: mv a1, s0
32+
; ZERO-COST-NEXT: li a2, 0
33+
; ZERO-COST-NEXT: jalr s1
34+
; ZERO-COST-NEXT: .LBB0_5: # %entry
35+
; ZERO-COST-NEXT: beqz zero, .LBB0_2
36+
; ZERO-COST-NEXT: .LBB0_6: # %sw.bb3
37+
; ZERO-COST-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
38+
; ZERO-COST-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
39+
; ZERO-COST-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
40+
; ZERO-COST-NEXT: .cfi_restore ra
41+
; ZERO-COST-NEXT: .cfi_restore s0
42+
; ZERO-COST-NEXT: .cfi_restore s1
43+
; ZERO-COST-NEXT: addi sp, sp, 32
44+
; ZERO-COST-NEXT: .cfi_def_cfa_offset 0
45+
; ZERO-COST-NEXT: ret
46+
;
47+
; SOME-COST-LABEL: Perl_sv_setnv:
48+
; SOME-COST: # %bb.0: # %entry
49+
; SOME-COST-NEXT: addi sp, sp, -32
50+
; SOME-COST-NEXT: .cfi_def_cfa_offset 32
51+
; SOME-COST-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
52+
; SOME-COST-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
53+
; SOME-COST-NEXT: .cfi_offset ra, -8
54+
; SOME-COST-NEXT: .cfi_offset s0, -16
55+
; SOME-COST-NEXT: bnez zero, .LBB0_5
56+
; SOME-COST-NEXT: # %bb.1: # %entry
57+
; SOME-COST-NEXT: li a1, 1
58+
; SOME-COST-NEXT: bnez a1, .LBB0_6
59+
; SOME-COST-NEXT: .LBB0_2: # %entry
60+
; SOME-COST-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
61+
; SOME-COST-NEXT: beqz zero, .LBB0_4
62+
; SOME-COST-NEXT: # %bb.3: # %sw.bb34.i
63+
; SOME-COST-NEXT: sd zero, 8(sp) # 8-byte Folded Spill
64+
; SOME-COST-NEXT: .LBB0_4: # %Perl_sv_reftype.exit
65+
; SOME-COST-NEXT: li s0, 0
66+
; SOME-COST-NEXT: li a0, 0
67+
; SOME-COST-NEXT: li a1, 0
68+
; SOME-COST-NEXT: jalr s0
69+
; SOME-COST-NEXT: li a0, 0
70+
; SOME-COST-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
71+
; SOME-COST-NEXT: li a2, 0
72+
; SOME-COST-NEXT: jalr s0
73+
; SOME-COST-NEXT: .LBB0_5: # %entry
74+
; SOME-COST-NEXT: beqz zero, .LBB0_2
75+
; SOME-COST-NEXT: .LBB0_6: # %sw.bb3
76+
; SOME-COST-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
77+
; SOME-COST-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
78+
; SOME-COST-NEXT: .cfi_restore ra
79+
; SOME-COST-NEXT: .cfi_restore s0
80+
; SOME-COST-NEXT: addi sp, sp, 32
81+
; SOME-COST-NEXT: .cfi_def_cfa_offset 0
82+
; SOME-COST-NEXT: ret
83+
entry:
84+
switch i8 0, label %Perl_sv_reftype.exit [
85+
i8 1, label %sw.bb4
86+
i8 12, label %sw.bb34.i
87+
i8 3, label %sw.bb3
88+
i8 0, label %sw.bb3
89+
]
90+
91+
sw.bb3: ; preds = %entry, %entry
92+
ret void
93+
94+
sw.bb4: ; preds = %entry
95+
br label %Perl_sv_reftype.exit
96+
97+
sw.bb34.i: ; preds = %entry
98+
br label %Perl_sv_reftype.exit
99+
100+
Perl_sv_reftype.exit: ; preds = %sw.bb34.i, %sw.bb4, %entry
101+
%retval.0.i = phi ptr [ null, %sw.bb34.i ], [ null, %sw.bb4 ], [ %.str.54.3682, %entry ]
102+
%call17 = tail call fastcc i64 null(ptr null, i32 0)
103+
tail call void (ptr, ...) null(ptr null, ptr %retval.0.i, ptr null)
104+
unreachable
105+
}

0 commit comments

Comments
 (0)