4747// from the stack frame at x29 (in the parent stack), thus continuing
4848// unwinding at the swap call site instead of falling off the end of context stack.
4949use core:: mem;
50- use stack:: Stack ;
50+ use stack;
51+ use arch:: StackPointer ;
5152
5253pub const STACK_ALIGNMENT : usize = 16 ;
5354
54- #[ derive( Debug , Clone , Copy ) ]
55- pub struct StackPointer ( * mut usize ) ;
56-
57- pub unsafe fn init ( stack : & Stack , f : unsafe extern "C" fn ( usize , StackPointer ) -> !) -> StackPointer {
55+ pub unsafe fn init < Stack : stack:: Stack > ( stack : & Stack , f : unsafe extern "C" fn ( usize , StackPointer ) ) -> StackPointer {
5856 #[ cfg( not( target_vendor = "apple" ) ) ]
5957 #[ naked]
6058 unsafe extern "C" fn trampoline_1 ( ) {
@@ -129,15 +127,29 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -
129127 # Call the provided function.
130128 ldr x2, [sp, #16]
131129 blr x2
130+
131+ # Clear the stack pointer. We can't call into this context any more once
132+ # the function has returned.
133+ mov x1, #0
134+
135+ # Restore the stack pointer of the parent context. No CFI adjustments
136+ # are needed since we have the same stack frame as trampoline_1.
137+ ldr x2, [sp]
138+ mov sp, x2
139+
140+ # Load frame and instruction pointers of the parent context.
141+ ldp x29, x30, [sp], #16
142+ .cfi_adjust_cfa_offset -16
143+ .cfi_restore x29
144+ .cfi_restore x30
145+
146+ # Return into the parent context. Use `br` instead of a `ret` to avoid
147+ # return address mispredictions.
148+ br x30
132149 "#
133150 : : : : "volatile" )
134151 }
135152
136- unsafe fn push ( sp : & mut StackPointer , val : usize ) {
137- sp. 0 = sp. 0 . offset ( -1 ) ;
138- * sp. 0 = val
139- }
140-
141153 // We set up the stack in a somewhat special way so that to the unwinder it
142154 // looks like trampoline_1 has called trampoline_2, which has in turn called
143155 // swap::trampoline.
@@ -146,36 +158,30 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -
146158 // followed by the x29 value for that frame. This setup supports unwinding
147159 // using DWARF CFI as well as the frame pointer-based unwinding used by tools
148160 // such as perf or dtrace.
149- let mut sp = StackPointer ( stack. base ( ) as * mut usize ) ;
161+ let mut sp = StackPointer :: stack_base ( stack) ;
150162
151- push ( & mut sp , 0 as usize ) ; // Padding to ensure the stack is properly aligned
152- push ( & mut sp , f as usize ) ; // Function that trampoline_2 should call
163+ sp . push ( 0 as usize ) ; // Padding to ensure the stack is properly aligned
164+ sp . push ( f as usize ) ; // Function that trampoline_2 should call
153165
154166 // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline
155167 // each time a context switch is performed.
156- push ( & mut sp , trampoline_1 as usize + 4 ) ; // Return after the nop
157- push ( & mut sp , 0xdeaddeaddead0cfa ) ; // CFA slot
168+ sp . push ( trampoline_1 as usize + 4 ) ; // Return after the nop
169+ sp . push ( 0xdeaddeaddead0cfa ) ; // CFA slot
158170
159171 // Call frame for swap::trampoline. We set up the x29 value to point to the
160172 // parent call frame.
161- let frame = sp;
162- push ( & mut sp , trampoline_2 as usize + 4 ) ; // Entry point, skip initial nop
163- push ( & mut sp , frame. 0 as usize ) ; // Pointer to parent call frame
173+ let frame = sp. offset ( 0 ) ;
174+ sp . push ( trampoline_2 as usize + 4 ) ; // Entry point, skip initial nop
175+ sp . push ( frame as usize ) ; // Pointer to parent call frame
164176
165177 sp
166178}
167179
168180#[ inline( always) ]
169- pub unsafe fn swap ( arg : usize , new_sp : StackPointer ,
170- new_stack : Option < & Stack > ) -> ( usize , StackPointer ) {
181+ pub unsafe fn swap_link < Stack : stack :: Stack > ( arg : usize , new_sp : StackPointer ,
182+ new_stack : & Stack ) -> ( usize , Option < StackPointer > ) {
171183 // Address of the topmost CFA stack slot.
172- let mut dummy: usize = mem:: uninitialized ( ) ;
173- let new_cfa = if let Some ( new_stack) = new_stack {
174- ( new_stack. base ( ) as * mut usize ) . offset ( -4 )
175- } else {
176- // Just pass a dummy pointer if we aren't linking the stack
177- & mut dummy
178- } ;
184+ let new_cfa = StackPointer :: stack_base ( new_stack) . offset ( -4 ) ;
179185
180186 #[ naked]
181187 unsafe extern "C" fn trampoline ( ) {
@@ -213,7 +219,7 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
213219 }
214220
215221 let ret: usize ;
216- let ret_sp: * mut usize ;
222+ let ret_sp: usize ;
217223 asm ! (
218224 r#"
219225 # Call the trampoline to switch to the new context.
@@ -240,5 +246,67 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
240246 // the "alignstack" LLVM inline assembly option does exactly the same
241247 // thing on AArch64.
242248 : "volatile" , "alignstack" ) ;
243- ( ret, StackPointer ( ret_sp) )
249+ ( ret, mem:: transmute ( ret_sp) )
250+ }
251+
252+ #[ inline( always) ]
253+ pub unsafe fn swap ( arg : usize , new_sp : StackPointer ) -> ( usize , StackPointer ) {
254+ #[ naked]
255+ unsafe extern "C" fn trampoline ( ) {
256+ asm ! (
257+ r#"
258+ # Save the frame pointer and link register; the unwinder uses them to find
259+ # the CFA of the caller, and so they have to have the correct value immediately
260+ # after the call instruction that invoked the trampoline.
261+ stp x29, x30, [sp, #-16]!
262+ .cfi_adjust_cfa_offset 16
263+ .cfi_rel_offset x30, 8
264+ .cfi_rel_offset x29, 0
265+
266+ # Pass the stack pointer of the old context to the new one.
267+ mov x1, sp
268+ # Load stack pointer of the new context.
269+ mov sp, x2
270+
271+ # Load frame and instruction pointers of the new context.
272+ ldp x29, x30, [sp], #16
273+ .cfi_adjust_cfa_offset -16
274+ .cfi_restore x29
275+ .cfi_restore x30
276+
277+ # Return into the new context. Use `br` instead of a `ret` to avoid
278+ # return address mispredictions.
279+ br x30
280+ "#
281+ : : : : "volatile" )
282+ }
283+
284+ let ret: usize ;
285+ let ret_sp: usize ;
286+ asm ! (
287+ r#"
288+ # Call the trampoline to switch to the new context.
289+ bl ${2}
290+ "#
291+ : "={x0}" ( ret)
292+ "={x1}" ( ret_sp)
293+ : "s" ( trampoline as usize )
294+ "{x0}" ( arg)
295+ "{x2}" ( new_sp. 0 )
296+ : /*x0, "x1",*/ "x2" , "x3" , "x4" , "x5" , "x6" , "x7" ,
297+ "x8" , "x9" , "x10" , "x11" , "x12" , "x13" , "x14" , "x15" ,
298+ "x16" , "x17" , "x18" , "x19" , "x20" , "x21" , "x22" , "x23" ,
299+ "x24" , "x25" , "x26" , "x27" , "x28" , /*fp,*/ "lr" , /*sp,*/
300+ "v0" , "v1" , "v2" , "v3" , "v4" , "v5" , "v6" , "v7" ,
301+ "v8" , "v9" , "v10" , "v11" , "v12" , "v13" , "v14" , "v15" ,
302+ "v16" , "v17" , "v18" , "v19" , "v20" , "v21" , "v22" , "v23" ,
303+ "v24" , "v25" , "v26" , "v27" , "v28" , "v29" , "v30" , "v31" ,
304+ "cc" , "memory"
305+ // Ideally, we would set the LLVM "noredzone" attribute on this function
306+ // (and it would be propagated to the call site). Unfortunately, rustc
307+ // provides no such functionality. Fortunately, by a lucky coincidence,
308+ // the "alignstack" LLVM inline assembly option does exactly the same
309+ // thing on AArch64.
310+ : "volatile" , "alignstack" ) ;
311+ ( ret, mem:: transmute ( ret_sp) )
244312}
0 commit comments