Skip to content

Commit 3a39e95

Browse files
committed
Set the stack pointer to zero once the context function returns
1 parent 88ad46a commit 3a39e95

File tree

7 files changed

+459
-180
lines changed

7 files changed

+459
-180
lines changed

src/arch/aarch64.rs

Lines changed: 97 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,12 @@
4747
// from the stack frame at x29 (in the parent stack), thus continuing
4848
// unwinding at the swap call site instead of falling off the end of context stack.
4949
use core::mem;
50-
use stack::Stack;
50+
use stack;
51+
use arch::StackPointer;
5152

5253
pub const STACK_ALIGNMENT: usize = 16;
5354

54-
#[derive(Debug, Clone, Copy)]
55-
pub struct StackPointer(*mut usize);
56-
57-
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer {
55+
pub unsafe fn init<Stack: stack::Stack>(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer)) -> StackPointer {
5856
#[cfg(not(target_vendor = "apple"))]
5957
#[naked]
6058
unsafe extern "C" fn trampoline_1() {
@@ -129,15 +127,29 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -
129127
# Call the provided function.
130128
ldr x2, [sp, #16]
131129
blr x2
130+
131+
# Clear the stack pointer. We can't call into this context any more once
132+
# the function has returned.
133+
mov x1, #0
134+
135+
# Restore the stack pointer of the parent context. No CFI adjustments
136+
# are needed since we have the same stack frame as trampoline_1.
137+
ldr x2, [sp]
138+
mov sp, x2
139+
140+
# Load frame and instruction pointers of the parent context.
141+
ldp x29, x30, [sp], #16
142+
.cfi_adjust_cfa_offset -16
143+
.cfi_restore x29
144+
.cfi_restore x30
145+
146+
# Return into the parent context. Use `br` instead of a `ret` to avoid
147+
# return address mispredictions.
148+
br x30
132149
"#
133150
: : : : "volatile")
134151
}
135152

136-
unsafe fn push(sp: &mut StackPointer, val: usize) {
137-
sp.0 = sp.0.offset(-1);
138-
*sp.0 = val
139-
}
140-
141153
// We set up the stack in a somewhat special way so that to the unwinder it
142154
// looks like trampoline_1 has called trampoline_2, which has in turn called
143155
// swap::trampoline.
@@ -146,36 +158,30 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -
146158
// followed by the x29 value for that frame. This setup supports unwinding
147159
// using DWARF CFI as well as the frame pointer-based unwinding used by tools
148160
// such as perf or dtrace.
149-
let mut sp = StackPointer(stack.base() as *mut usize);
161+
let mut sp = StackPointer::stack_base(stack);
150162

151-
push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned
152-
push(&mut sp, f as usize); // Function that trampoline_2 should call
163+
sp.push(0 as usize); // Padding to ensure the stack is properly aligned
164+
sp.push(f as usize); // Function that trampoline_2 should call
153165

154166
// Call frame for trampoline_2. The CFA slot is updated by swap::trampoline
155167
// each time a context switch is performed.
156-
push(&mut sp, trampoline_1 as usize + 4); // Return after the nop
157-
push(&mut sp, 0xdeaddeaddead0cfa); // CFA slot
168+
sp.push(trampoline_1 as usize + 4); // Return after the nop
169+
sp.push(0xdeaddeaddead0cfa); // CFA slot
158170

159171
// Call frame for swap::trampoline. We set up the x29 value to point to the
160172
// parent call frame.
161-
let frame = sp;
162-
push(&mut sp, trampoline_2 as usize + 4); // Entry point, skip initial nop
163-
push(&mut sp, frame.0 as usize); // Pointer to parent call frame
173+
let frame = sp.offset(0);
174+
sp.push(trampoline_2 as usize + 4); // Entry point, skip initial nop
175+
sp.push(frame as usize); // Pointer to parent call frame
164176

165177
sp
166178
}
167179

168180
#[inline(always)]
169-
pub unsafe fn swap(arg: usize, new_sp: StackPointer,
170-
new_stack: Option<&Stack>) -> (usize, StackPointer) {
181+
pub unsafe fn swap_link<Stack: stack::Stack>(arg: usize, new_sp: StackPointer,
182+
new_stack: &Stack) -> (usize, Option<StackPointer>) {
171183
// Address of the topmost CFA stack slot.
172-
let mut dummy: usize = mem::uninitialized();
173-
let new_cfa = if let Some(new_stack) = new_stack {
174-
(new_stack.base() as *mut usize).offset(-4)
175-
} else {
176-
// Just pass a dummy pointer if we aren't linking the stack
177-
&mut dummy
178-
};
184+
let new_cfa = StackPointer::stack_base(new_stack).offset(-4);
179185

180186
#[naked]
181187
unsafe extern "C" fn trampoline() {
@@ -213,7 +219,7 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
213219
}
214220

215221
let ret: usize;
216-
let ret_sp: *mut usize;
222+
let ret_sp: usize;
217223
asm!(
218224
r#"
219225
# Call the trampoline to switch to the new context.
@@ -240,5 +246,67 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
240246
// the "alignstack" LLVM inline assembly option does exactly the same
241247
// thing on AArch64.
242248
: "volatile", "alignstack");
243-
(ret, StackPointer(ret_sp))
249+
(ret, mem::transmute(ret_sp))
250+
}
251+
252+
#[inline(always)]
253+
pub unsafe fn swap(arg: usize, new_sp: StackPointer) -> (usize, StackPointer) {
254+
#[naked]
255+
unsafe extern "C" fn trampoline() {
256+
asm!(
257+
r#"
258+
# Save the frame pointer and link register; the unwinder uses them to find
259+
# the CFA of the caller, and so they have to have the correct value immediately
260+
# after the call instruction that invoked the trampoline.
261+
stp x29, x30, [sp, #-16]!
262+
.cfi_adjust_cfa_offset 16
263+
.cfi_rel_offset x30, 8
264+
.cfi_rel_offset x29, 0
265+
266+
# Pass the stack pointer of the old context to the new one.
267+
mov x1, sp
268+
# Load stack pointer of the new context.
269+
mov sp, x2
270+
271+
# Load frame and instruction pointers of the new context.
272+
ldp x29, x30, [sp], #16
273+
.cfi_adjust_cfa_offset -16
274+
.cfi_restore x29
275+
.cfi_restore x30
276+
277+
# Return into the new context. Use `br` instead of a `ret` to avoid
278+
# return address mispredictions.
279+
br x30
280+
"#
281+
: : : : "volatile")
282+
}
283+
284+
let ret: usize;
285+
let ret_sp: usize;
286+
asm!(
287+
r#"
288+
# Call the trampoline to switch to the new context.
289+
bl ${2}
290+
"#
291+
: "={x0}" (ret)
292+
"={x1}" (ret_sp)
293+
: "s" (trampoline as usize)
294+
"{x0}" (arg)
295+
"{x2}" (new_sp.0)
296+
:/*x0, "x1",*/"x2", "x3", "x4", "x5", "x6", "x7",
297+
"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
298+
"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
299+
"x24", "x25", "x26", "x27", "x28",/*fp,*/ "lr", /*sp,*/
300+
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
301+
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
302+
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
303+
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
304+
"cc", "memory"
305+
// Ideally, we would set the LLVM "noredzone" attribute on this function
306+
// (and it would be propagated to the call site). Unfortunately, rustc
307+
// provides no such functionality. Fortunately, by a lucky coincidence,
308+
// the "alignstack" LLVM inline assembly option does exactly the same
309+
// thing on AArch64.
310+
: "volatile", "alignstack");
311+
(ret, mem::transmute(ret_sp))
244312
}

src/arch/mod.rs

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
// copied, modified, or distributed except according to those terms.
88

99
pub use self::imp::*;
10+
use core::nonzero::NonZero;
11+
use stack;
1012

1113
#[allow(unused_attributes)] // rust-lang/rust#35584
1214
#[cfg_attr(target_arch = "x86", path = "x86.rs")]
@@ -15,6 +17,27 @@ pub use self::imp::*;
1517
#[cfg_attr(target_arch = "or1k", path = "or1k.rs")]
1618
mod imp;
1719

20+
#[derive(Debug, Clone, Copy)]
21+
pub struct StackPointer(NonZero<*mut usize>);
22+
23+
impl StackPointer {
24+
#[inline(always)]
25+
pub unsafe fn push(&mut self, val: usize) {
26+
self.0 = NonZero::new(self.0.offset(-1));
27+
**self.0 = val;
28+
}
29+
30+
#[inline(always)]
31+
pub unsafe fn stack_base<Stack: stack::Stack>(stack: &Stack) -> StackPointer {
32+
StackPointer(NonZero::new(stack.base() as *mut usize))
33+
}
34+
35+
#[inline(always)]
36+
pub unsafe fn offset(&self, count: isize) -> *mut usize {
37+
self.0.offset(count)
38+
}
39+
}
40+
1841
#[cfg(test)]
1942
mod tests {
2043
extern crate test;
@@ -25,55 +48,55 @@ mod tests {
2548

2649
#[test]
2750
fn context() {
28-
unsafe extern "C" fn adder(arg: usize, stack_ptr: StackPointer) -> ! {
51+
unsafe extern "C" fn adder(arg: usize, stack_ptr: StackPointer) {
2952
println!("it's alive! arg: {}", arg);
30-
let (arg, stack_ptr) = arch::swap(arg + 1, stack_ptr, None);
53+
let (arg, stack_ptr) = arch::swap(arg + 1, stack_ptr);
3154
println!("still alive! arg: {}", arg);
32-
arch::swap(arg + 1, stack_ptr, None);
55+
arch::swap(arg + 1, stack_ptr);
3356
panic!("i should be dead");
3457
}
3558

3659
unsafe {
3760
let stack = OsStack::new(4 << 20).unwrap();
3861
let stack_ptr = arch::init(&stack, adder);
3962

40-
let (ret, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack));
63+
let (ret, stack_ptr) = arch::swap_link(10, stack_ptr, &stack);
4164
assert_eq!(ret, 11);
42-
let (ret, _) = arch::swap(50, stack_ptr, Some(&stack));
65+
let (ret, _) = arch::swap_link(50, stack_ptr.unwrap(), &stack);
4366
assert_eq!(ret, 51);
4467
}
4568
}
4669

4770
#[test]
4871
fn context_simd() {
49-
unsafe extern "C" fn permuter(arg: usize, stack_ptr: StackPointer) -> ! {
72+
unsafe extern "C" fn permuter(arg: usize, stack_ptr: StackPointer) {
5073
// This will crash if the stack is not aligned properly.
5174
let x = simd::i32x4::splat(arg as i32);
5275
let y = x * x;
5376
println!("simd result: {:?}", y);
54-
let (_, stack_ptr) = arch::swap(0, stack_ptr, None);
77+
let (_, stack_ptr) = arch::swap(0, stack_ptr);
5578
// And try again after a context switch.
5679
let x = simd::i32x4::splat(arg as i32);
5780
let y = x * x;
5881
println!("simd result: {:?}", y);
59-
arch::swap(0, stack_ptr, None);
82+
arch::swap(0, stack_ptr);
6083
panic!("i should be dead");
6184
}
6285

6386
unsafe {
6487
let stack = OsStack::new(4 << 20).unwrap();
6588
let stack_ptr = arch::init(&stack, permuter);
6689

67-
let (_, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack));
68-
arch::swap(20, stack_ptr, Some(&stack));
90+
let (_, stack_ptr) = arch::swap_link(10, stack_ptr, &stack);
91+
arch::swap_link(20, stack_ptr.unwrap(), &stack);
6992
}
7093
}
7194

72-
unsafe extern "C" fn do_panic(arg: usize, stack_ptr: StackPointer) -> ! {
95+
unsafe extern "C" fn do_panic(arg: usize, stack_ptr: StackPointer) {
7396
match arg {
7497
0 => panic!("arg=0"),
7598
1 => {
76-
arch::swap(0, stack_ptr, None);
99+
arch::swap(0, stack_ptr);
77100
panic!("arg=1");
78101
}
79102
_ => unreachable!()
@@ -87,7 +110,7 @@ mod tests {
87110
let stack = OsStack::new(4 << 20).unwrap();
88111
let stack_ptr = arch::init(&stack, do_panic);
89112

90-
arch::swap(0, stack_ptr, Some(&stack));
113+
arch::swap_link(0, stack_ptr, &stack);
91114
}
92115
}
93116

@@ -98,18 +121,31 @@ mod tests {
98121
let stack = OsStack::new(4 << 20).unwrap();
99122
let stack_ptr = arch::init(&stack, do_panic);
100123

101-
let (_, stack_ptr) = arch::swap(1, stack_ptr, Some(&stack));
102-
arch::swap(0, stack_ptr, Some(&stack));
124+
let (_, stack_ptr) = arch::swap_link(1, stack_ptr, &stack);
125+
arch::swap_link(0, stack_ptr.unwrap(), &stack);
126+
}
127+
}
128+
129+
#[test]
130+
fn ret() {
131+
unsafe extern "C" fn ret2(_: usize, _: StackPointer) {}
132+
133+
unsafe {
134+
let stack = OsStack::new(4 << 20).unwrap();
135+
let stack_ptr = arch::init(&stack, ret2);
136+
137+
let (_, stack_ptr) = arch::swap_link(0, stack_ptr, &stack);
138+
assert!(stack_ptr.is_none());
103139
}
104140
}
105141

106142
#[bench]
107143
fn swap(b: &mut test::Bencher) {
108-
unsafe extern "C" fn loopback(mut arg: usize, mut stack_ptr: StackPointer) -> ! {
144+
unsafe extern "C" fn loopback(mut arg: usize, mut stack_ptr: StackPointer) {
109145
// This deliberately does not ignore arg, to measure the time it takes
110146
// to move the return value between registers.
111147
loop {
112-
let data = arch::swap(arg, stack_ptr, None);
148+
let data = arch::swap(arg, stack_ptr);
113149
arg = data.0;
114150
stack_ptr = data.1;
115151
}
@@ -120,7 +156,7 @@ mod tests {
120156
let mut stack_ptr = arch::init(&stack, loopback);
121157

122158
b.iter(|| for _ in 0..10 {
123-
stack_ptr = arch::swap(0, stack_ptr, Some(&stack)).1;
159+
stack_ptr = arch::swap_link(0, stack_ptr, &stack).1.unwrap();
124160
});
125161
}
126162
}

0 commit comments

Comments
 (0)