Skip to content

Commit f6c7bae

Browse files
authored
[Win/x64] Update preserve_most to treat XMM registers like C (#73866)
As [scottmcm described](https://discourse.llvm.org/t/conv-c-and-conv-preservemost-mix-badly-on-windows-x64/73054), the `preserve_most` calling convention, as currently implemented, is a bad fit for Windows on x64. The intent of `preserve_most` is "to make the code in the caller as unintrusive as possible", but `preserve_most` causes the caller to spill and restore ten SIMD registers. It would be preferable to make `preserve_most` treat the XMM registers however the C calling convention does on the target operating system. This is a breaking change, but the documentation indicates that `preserve_most` is still experimental, so I believe that ABI compatibility is not yet a requirement.
1 parent c43c86c commit f6c7bae

File tree

4 files changed

+100
-4
lines changed

4 files changed

+100
-4
lines changed

llvm/docs/LangRef.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,9 @@ added in the future:
365365

366366
- On X86-64 the callee preserves all general purpose registers, except for
367367
R11 and return registers, if any. R11 can be used as a scratch register.
368-
Floating-point registers (XMMs/YMMs) are not preserved and need to be
369-
saved by the caller.
368+
The treatment of floating-point registers (XMMs/YMMs) matches the OS's C
369+
calling convention: on most platforms, they are not preserved and need to
370+
be saved by the caller, but on Windows, xmm6-xmm15 are preserved.
370371

371372
- On AArch64 the callee preserve all general purpose registers, except X0-X8
372373
and X16-X18.

llvm/lib/Target/X86/X86CallingConv.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,9 @@ def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)
11511151
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
11521152
R8, R9, R10)>;
11531153

1154+
def CSR_Win64_RT_MostRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs,
1155+
(sequence "XMM%u", 6, 15))>;
1156+
11541157
// All registers - except r11 and return registers.
11551158
def CSR_64_RT_AllRegs : CalleeSavedRegs<(add CSR_64_RT_MostRegs,
11561159
(sequence "XMM%u", 0, 15))>;

llvm/lib/Target/X86/X86RegisterInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
310310
return CSR_64_AllRegs_AVX_SaveList;
311311
return CSR_64_AllRegs_SaveList;
312312
case CallingConv::PreserveMost:
313-
return CSR_64_RT_MostRegs_SaveList;
313+
return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
314+
: CSR_64_RT_MostRegs_SaveList;
314315
case CallingConv::PreserveAll:
315316
if (HasAVX)
316317
return CSR_64_RT_AllRegs_AVX_SaveList;
@@ -431,7 +432,7 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
431432
return CSR_64_AllRegs_AVX_RegMask;
432433
return CSR_64_AllRegs_RegMask;
433434
case CallingConv::PreserveMost:
434-
return CSR_64_RT_MostRegs_RegMask;
435+
return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
435436
case CallingConv::PreserveAll:
436437
if (HasAVX)
437438
return CSR_64_RT_AllRegs_AVX_RegMask;
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
; RUN: sed -e "s/RETTYPE/void/;s/RETVAL//" %s | llc -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck --check-prefixes=ALL,VOID %s
2+
; RUN: sed -e "s/RETTYPE/i32/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck --check-prefixes=ALL,INT %s
3+
; RUN: sed -e "s/RETTYPE/\{i64\,i64\}/;s/RETVAL/undef/" %s | llc -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck --check-prefixes=ALL,INT128 %s
4+
5+
; Every GPR should be saved, except r11 and return registers.
6+
; XMM registers 6-15 should also be saved.
7+
define preserve_mostcc RETTYPE @preserve_mostcc1(i64, i64, double, double) nounwind {
8+
entry:
9+
;ALL-LABEL: preserve_mostcc1
10+
;ALL: pushq %r10
11+
;ALL-NEXT: pushq %r9
12+
;ALL-NEXT: pushq %r8
13+
;ALL-NEXT: pushq %rdi
14+
;ALL-NEXT: pushq %rsi
15+
;VOID-NEXT: pushq %rdx
16+
;INT-NEXT: pushq %rdx
17+
;INT128-NOT: pushq %rdx
18+
;ALL-NEXT: pushq %rcx
19+
;VOID-NEXT: pushq %rax
20+
;INT-NOT: pushq %rax
21+
;INT128-NOT: pushq %rax
22+
;ALL-NEXT: pushq %rbp
23+
;ALL-NEXT: pushq %r15
24+
;ALL-NEXT: pushq %r14
25+
;ALL-NEXT: pushq %r13
26+
;ALL-NEXT: pushq %r12
27+
;ALL-NEXT: pushq %rbx
28+
;ALL: movaps %xmm15
29+
;ALL-NEXT: movaps %xmm14
30+
;ALL-NEXT: movaps %xmm13
31+
;ALL-NEXT: movaps %xmm12
32+
;ALL-NEXT: movaps %xmm11
33+
;ALL-NEXT: movaps %xmm10
34+
;ALL-NEXT: movaps %xmm9
35+
;ALL-NEXT: movaps %xmm8
36+
;ALL-NEXT: movaps %xmm7
37+
;ALL-NEXT: movaps %xmm6
38+
;ALL-NOT: movaps %xmm5
39+
;ALL-NOT: movaps %xmm4
40+
;ALL-NOT: movaps %xmm3
41+
;ALL-NOT: movaps %xmm2
42+
;ALL-NOT: movaps %xmm1
43+
;ALL-NOT: movaps %xmm0
44+
;ALL-NOT: movaps {{.*}} %xmm0
45+
;ALL-NOT: movaps {{.*}} %xmm1
46+
;ALL-NOT: movaps {{.*}} %xmm2
47+
;ALL-NOT: movaps {{.*}} %xmm3
48+
;ALL-NOT: movaps {{.*}} %xmm4
49+
;ALL-NOT: movaps {{.*}} %xmm5
50+
;ALL: movaps {{.*}} %xmm6
51+
;ALL-NEXT: movaps {{.*}} %xmm7
52+
;ALL-NEXT: movaps {{.*}} %xmm8
53+
;ALL-NEXT: movaps {{.*}} %xmm9
54+
;ALL-NEXT: movaps {{.*}} %xmm10
55+
;ALL-NEXT: movaps {{.*}} %xmm11
56+
;ALL-NEXT: movaps {{.*}} %xmm12
57+
;ALL-NEXT: movaps {{.*}} %xmm13
58+
;ALL-NEXT: movaps {{.*}} %xmm14
59+
;ALL-NEXT: movaps {{.*}} %xmm15
60+
;ALL: popq %rbx
61+
;ALL-NEXT: popq %r12
62+
;ALL-NEXT: popq %r13
63+
;ALL-NEXT: popq %r14
64+
;ALL-NEXT: popq %r15
65+
;ALL-NEXT: popq %rbp
66+
;VOID-NEXT: popq %rax
67+
;INT-NOT: popq %rax
68+
;INT128-NOT: popq %rax
69+
;ALL-NEXT: popq %rcx
70+
;VOID-NEXT: popq %rdx
71+
;INT-NEXT: popq %rdx
72+
;INT128-NOT: popq %rdx
73+
;ALL-NEXT: popq %rsi
74+
;ALL-NEXT: popq %rdi
75+
;ALL-NEXT: popq %r8
76+
;ALL-NEXT: popq %r9
77+
;ALL-NEXT: popq %r10
78+
call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{rbp},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15}"()
79+
ret RETTYPE RETVAL
80+
}
81+
82+
; Make sure XMMs are not saved before the call
83+
declare preserve_mostcc RETTYPE @foo(i64, i64, double, double)
84+
define void @preserve_mostcc2() nounwind {
85+
entry:
86+
;ALL-LABEL: preserve_mostcc2
87+
;ALL-NOT: movaps
88+
;ALL-NOT: {{.*xmm[0-1,4-9].*}}
89+
call preserve_mostcc RETTYPE @foo(i64 1, i64 2, double 3.0, double 4.0)
90+
ret void
91+
}

0 commit comments

Comments
 (0)