edef1c
diff --git a/‎src/arch/aarch64.rs‎
Lines changed: 97 additions & 29 deletions b/‎src/arch/aarch64.rs‎
Lines changed: 97 additions & 29 deletions
diff --git a/‎src/arch/mod.rs‎
Lines changed: 54 additions & 18 deletions b/‎src/arch/mod.rs‎
Lines changed: 54 additions & 18 deletions
@@ -47,14 +47,12 @@
 //   from the stack frame at x29 (in the parent stack), thus continuing
 //   unwinding at the swap call site instead of falling off the end of context stack.
 use core::mem;
-use stack::Stack;
+use stack;
+use arch::StackPointer;
 
 pub const STACK_ALIGNMENT: usize = 16;
 
-#[derive(Debug, Clone, Copy)]
-pub struct StackPointer(*mut usize);
-
-pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer {
+pub unsafe fn init<Stack: stack::Stack>(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer)) -> StackPointer {
   #[cfg(not(target_vendor = "apple"))]
   #[naked]
   unsafe extern "C" fn trampoline_1() {
@@ -129,15 +127,29 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -
         # Call the provided function.
         ldr     x2, [sp, #16]
         blr     x2
+
+        # Clear the stack pointer. We can't call into this context any more once
+        # the function has returned.
+        mov     x1, #0
+
+        # Restore the stack pointer of the parent context. No CFI adjustments
+        # are needed since we have the same stack frame as trampoline_1.
+        ldr     x2, [sp]
+        mov     sp, x2
+
+        # Load frame and instruction pointers of the parent context.
+        ldp     x29, x30, [sp], #16
+        .cfi_adjust_cfa_offset -16
+        .cfi_restore x29
+        .cfi_restore x30
+
+        # Return into the parent context. Use `br` instead of a `ret` to avoid
+        # return address mispredictions.
+        br      x30
       "#
       : : : : "volatile")
   }
 
-  unsafe fn push(sp: &mut StackPointer, val: usize) {
-    sp.0 = sp.0.offset(-1);
-    *sp.0 = val
-  }
-
   // We set up the stack in a somewhat special way so that to the unwinder it
   // looks like trampoline_1 has called trampoline_2, which has in turn called
   // swap::trampoline.
@@ -146,36 +158,30 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -
   // followed by the x29 value for that frame. This setup supports unwinding
   // using DWARF CFI as well as the frame pointer-based unwinding used by tools
   // such as perf or dtrace.
-  let mut sp = StackPointer(stack.base() as *mut usize);
+  let mut sp = StackPointer::stack_base(stack);
 
-  push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned
-  push(&mut sp, f as usize); // Function that trampoline_2 should call
+  sp.push(0 as usize); // Padding to ensure the stack is properly aligned
+  sp.push(f as usize); // Function that trampoline_2 should call
 
   // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline
   // each time a context switch is performed.
-  push(&mut sp, trampoline_1 as usize + 4); // Return after the nop
-  push(&mut sp, 0xdeaddeaddead0cfa);        // CFA slot
+  sp.push(trampoline_1 as usize + 4); // Return after the nop
+  sp.push(0xdeaddeaddead0cfa);        // CFA slot
 
   // Call frame for swap::trampoline. We set up the x29 value to point to the
   // parent call frame.
-  let frame = sp;
-  push(&mut sp, trampoline_2 as usize + 4); // Entry point, skip initial nop
-  push(&mut sp, frame.0 as usize);          // Pointer to parent call frame
+  let frame = sp.offset(0);
+  sp.push(trampoline_2 as usize + 4); // Entry point, skip initial nop
+  sp.push(frame as usize);            // Pointer to parent call frame
 
   sp
 }
 
 #[inline(always)]
-pub unsafe fn swap(arg: usize, new_sp: StackPointer,
-                   new_stack: Option<&Stack>) -> (usize, StackPointer) {
+pub unsafe fn swap_link<Stack: stack::Stack>(arg: usize, new_sp: StackPointer,
+                                             new_stack: &Stack) -> (usize, Option<StackPointer>) {
   // Address of the topmost CFA stack slot.
-  let mut dummy: usize = mem::uninitialized();
-  let new_cfa = if let Some(new_stack) = new_stack {
-    (new_stack.base() as *mut usize).offset(-4)
-  } else {
-    // Just pass a dummy pointer if we aren't linking the stack
-    &mut dummy
-  };
+  let new_cfa = StackPointer::stack_base(new_stack).offset(-4);
 
   #[naked]
   unsafe extern "C" fn trampoline() {
@@ -213,7 +219,7 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
   }
 
   let ret: usize;
-  let ret_sp: *mut usize;
+  let ret_sp: usize;
   asm!(
     r#"
       # Call the trampoline to switch to the new context.
@@ -240,5 +246,67 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
       // the "alignstack" LLVM inline assembly option does exactly the same
       // thing on AArch64.
     : "volatile", "alignstack");
-  (ret, StackPointer(ret_sp))
+  (ret, mem::transmute(ret_sp))
+}
+
+#[inline(always)]
+pub unsafe fn swap(arg: usize, new_sp: StackPointer) -> (usize, StackPointer) {
+  #[naked]
+  unsafe extern "C" fn trampoline() {
+    asm!(
+      r#"
+        # Save the frame pointer and link register; the unwinder uses them to find
+        # the CFA of the caller, and so they have to have the correct value immediately
+        # after the call instruction that invoked the trampoline.
+        stp     x29, x30, [sp, #-16]!
+        .cfi_adjust_cfa_offset 16
+        .cfi_rel_offset x30, 8
+        .cfi_rel_offset x29, 0
+
+        # Pass the stack pointer of the old context to the new one.
+        mov     x1, sp
+        # Load stack pointer of the new context.
+        mov     sp, x2
+
+        # Load frame and instruction pointers of the new context.
+        ldp     x29, x30, [sp], #16
+        .cfi_adjust_cfa_offset -16
+        .cfi_restore x29
+        .cfi_restore x30
+
+        # Return into the new context. Use `br` instead of a `ret` to avoid
+        # return address mispredictions.
+        br      x30
+      "#
+      : : : : "volatile")
+  }
+
+  let ret: usize;
+  let ret_sp: usize;
+  asm!(
+    r#"
+      # Call the trampoline to switch to the new context.
+      bl      ${2}
+    "#
+    : "={x0}" (ret)
+      "={x1}" (ret_sp)
+    : "s" (trampoline as usize)
+      "{x0}" (arg)
+      "{x2}" (new_sp.0)
+    :/*x0,   "x1",*/"x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+      "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+      "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+      "x24", "x25", "x26", "x27", "x28",/*fp,*/ "lr", /*sp,*/
+      "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+      "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+      "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+      "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
+      "cc", "memory"
+      // Ideally, we would set the LLVM "noredzone" attribute on this function
+      // (and it would be propagated to the call site). Unfortunately, rustc
+      // provides no such functionality. Fortunately, by a lucky coincidence,
+      // the "alignstack" LLVM inline assembly option does exactly the same
+      // thing on AArch64.
+    : "volatile", "alignstack");
+  (ret, mem::transmute(ret_sp))
 }
@@ -7,6 +7,8 @@
 // copied, modified, or distributed except according to those terms.
 
 pub use self::imp::*;
+use core::nonzero::NonZero;
+use stack;
 
 #[allow(unused_attributes)] // rust-lang/rust#35584
 #[cfg_attr(target_arch = "x86",     path = "x86.rs")]
@@ -15,6 +17,27 @@ pub use self::imp::*;
 #[cfg_attr(target_arch = "or1k",    path = "or1k.rs")]
 mod imp;
 
+#[derive(Debug, Clone, Copy)]
+pub struct StackPointer(NonZero<*mut usize>);
+
+impl StackPointer {
+  #[inline(always)]
+  pub unsafe fn push(&mut self, val: usize) {
+    self.0 = NonZero::new(self.0.offset(-1));
+    **self.0 = val;
+  }
+
+  #[inline(always)]
+  pub unsafe fn stack_base<Stack: stack::Stack>(stack: &Stack) -> StackPointer {
+    StackPointer(NonZero::new(stack.base() as *mut usize))
+  }
+
+  #[inline(always)]
+  pub unsafe fn offset(&self, count: isize) -> *mut usize {
+    self.0.offset(count)
+  }
+}
+
 #[cfg(test)]
 mod tests {
   extern crate test;
@@ -25,55 +48,55 @@ mod tests {
 
   #[test]
   fn context() {
-    unsafe extern "C" fn adder(arg: usize, stack_ptr: StackPointer) -> ! {
+    unsafe extern "C" fn adder(arg: usize, stack_ptr: StackPointer) {
       println!("it's alive! arg: {}", arg);
-      let (arg, stack_ptr) = arch::swap(arg + 1, stack_ptr, None);
+      let (arg, stack_ptr) = arch::swap(arg + 1, stack_ptr);
       println!("still alive! arg: {}", arg);
-      arch::swap(arg + 1, stack_ptr, None);
+      arch::swap(arg + 1, stack_ptr);
       panic!("i should be dead");
     }
 
     unsafe {
       let stack = OsStack::new(4 << 20).unwrap();
       let stack_ptr = arch::init(&stack, adder);
 
-      let (ret, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack));
+      let (ret, stack_ptr) = arch::swap_link(10, stack_ptr, &stack);
       assert_eq!(ret, 11);
-      let (ret, _) = arch::swap(50, stack_ptr, Some(&stack));
+      let (ret, _) = arch::swap_link(50, stack_ptr.unwrap(), &stack);
       assert_eq!(ret, 51);
     }
   }
 
   #[test]
   fn context_simd() {
-    unsafe extern "C" fn permuter(arg: usize, stack_ptr: StackPointer) -> ! {
+    unsafe extern "C" fn permuter(arg: usize, stack_ptr: StackPointer) {
       // This will crash if the stack is not aligned properly.
       let x = simd::i32x4::splat(arg as i32);
       let y = x * x;
       println!("simd result: {:?}", y);
-      let (_, stack_ptr) = arch::swap(0, stack_ptr, None);
+      let (_, stack_ptr) = arch::swap(0, stack_ptr);
       // And try again after a context switch.
       let x = simd::i32x4::splat(arg as i32);
       let y = x * x;
       println!("simd result: {:?}", y);
-      arch::swap(0, stack_ptr, None);
+      arch::swap(0, stack_ptr);
       panic!("i should be dead");
     }
 
     unsafe {
       let stack = OsStack::new(4 << 20).unwrap();
       let stack_ptr = arch::init(&stack, permuter);
 
-      let (_, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack));
-      arch::swap(20, stack_ptr, Some(&stack));
+      let (_, stack_ptr) = arch::swap_link(10, stack_ptr, &stack);
+      arch::swap_link(20, stack_ptr.unwrap(), &stack);
     }
   }
 
-  unsafe extern "C" fn do_panic(arg: usize, stack_ptr: StackPointer) -> ! {
+  unsafe extern "C" fn do_panic(arg: usize, stack_ptr: StackPointer) {
     match arg {
       0 => panic!("arg=0"),
       1 => {
-        arch::swap(0, stack_ptr, None);
+        arch::swap(0, stack_ptr);
         panic!("arg=1");
       }
       _ => unreachable!()
@@ -87,7 +110,7 @@ mod tests {
       let stack = OsStack::new(4 << 20).unwrap();
       let stack_ptr = arch::init(&stack, do_panic);
 
-      arch::swap(0, stack_ptr, Some(&stack));
+      arch::swap_link(0, stack_ptr, &stack);
     }
   }
 
@@ -98,18 +121,31 @@ mod tests {
       let stack = OsStack::new(4 << 20).unwrap();
       let stack_ptr = arch::init(&stack, do_panic);
 
-      let (_, stack_ptr) = arch::swap(1, stack_ptr, Some(&stack));
-      arch::swap(0, stack_ptr, Some(&stack));
+      let (_, stack_ptr) = arch::swap_link(1, stack_ptr, &stack);
+      arch::swap_link(0, stack_ptr.unwrap(), &stack);
+    }
+  }
+
+  #[test]
+  fn ret() {
+    unsafe extern "C" fn ret2(_: usize, _: StackPointer) {}
+
+    unsafe {
+      let stack = OsStack::new(4 << 20).unwrap();
+      let stack_ptr = arch::init(&stack, ret2);
+
+      let (_, stack_ptr) = arch::swap_link(0, stack_ptr, &stack);
+      assert!(stack_ptr.is_none());
     }
   }
 
   #[bench]
   fn swap(b: &mut test::Bencher) {
-    unsafe extern "C" fn loopback(mut arg: usize, mut stack_ptr: StackPointer) -> ! {
+    unsafe extern "C" fn loopback(mut arg: usize, mut stack_ptr: StackPointer) {
       // This deliberately does not ignore arg, to measure the time it takes
       // to move the return value between registers.
       loop {
-        let data = arch::swap(arg, stack_ptr, None);
+        let data = arch::swap(arg, stack_ptr);
         arg = data.0;
         stack_ptr = data.1;
       }
@@ -120,7 +156,7 @@ mod tests {
       let mut stack_ptr = arch::init(&stack, loopback);
 
       b.iter(|| for _ in 0..10 {
-        stack_ptr = arch::swap(0, stack_ptr, Some(&stack)).1;
+        stack_ptr = arch::swap_link(0, stack_ptr, &stack).1.unwrap();
       });
     }
   }