Skip to content

Commit 7e2dd59

Browse files
filipnavaraVSadov
andauthored
[NativeAOT] Linux/ARM bring-up (4/n) (#97269)
* Fix recursive generics for ARM * Fix compilation on Debian Bookworm * Implement thread return address hijacking for ARM * Implement TrailingEpilogueInstructionsCount for ARM * Fix comment * Fix bugs in RhpGcProbeHijack logic * Fix register trashing by INLINE_GETTHREAD in FixupHijackedCallstack on ARM * Mask the Thumb bit when loading IP from probe frame * Disable DwarfDump on linux-arm * Cleanup * Emit DWARF info with instruction addresses without Thumb bit (matches clang) * Report R2/R3 registers in ForEachPossibleObjectRef * Ensure that PInvokeTransitionFrame(s) on the stack are 8-byte aligned. Save FP return values on hijack. * Tame the Thumb bit * Fix GC hole when thread hijack happens with r0 register holding a reference (eg. boxed int) * Update src/coreclr/nativeaot/Runtime/arm/GcProbe.S --------- Co-authored-by: Vladimir Sadov <[email protected]>
1 parent 758f34d commit 7e2dd59

File tree

17 files changed

+417
-36
lines changed

17 files changed

+417
-36
lines changed

src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,12 @@ COOP_PINVOKE_HELPER(uint8_t *, RhGetRuntimeVersion, (int32_t* pcbLength))
6565

6666
COOP_PINVOKE_HELPER(uint8_t *, RhFindMethodStartAddress, (void * codeAddr))
6767
{
68-
return dac_cast<uint8_t *>(GetRuntimeInstance()->FindMethodStartAddress(dac_cast<PTR_VOID>(codeAddr)));
68+
uint8_t *startAddress = dac_cast<uint8_t *>(GetRuntimeInstance()->FindMethodStartAddress(dac_cast<PTR_VOID>(codeAddr)));
69+
#if TARGET_ARM
70+
return startAddress + 1; // Set the Thumb bit
71+
#else
72+
return startAddress;
73+
#endif
6974
}
7075

7176
PTR_UInt8 RuntimeInstance::FindMethodStartAddress(PTR_VOID ControlPC)

src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "threadstore.inl"
2222
#include "thread.inl"
2323
#include "stressLog.h"
24+
#include "CommonMacros.inl"
2425

2526
#include "shash.h"
2627
#include "RuntimeInstance.h"
@@ -75,7 +76,7 @@ GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2);
7576
#ifdef DACCESS_COMPILE
7677
#define EQUALS_RETURN_ADDRESS(x, func_name) ((x) == g_ ## func_name ## Addr)
7778
#else
78-
#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PointerTo ## func_name))
79+
#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PTR_VOID)PCODEToPINSTR((PCODE)PointerTo ## func_name))
7980
#endif
8081

8182
#ifdef DACCESS_COMPILE
@@ -178,7 +179,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF
178179

179180
#if !defined(USE_PORTABLE_HELPERS) // @TODO: no portable version of regdisplay
180181
memset(&m_RegDisplay, 0, sizeof(m_RegDisplay));
181-
m_RegDisplay.SetIP((PCODE)pFrame->m_RIP);
182+
m_RegDisplay.SetIP((PCODE)PCODEToPINSTR((PCODE)pFrame->m_RIP));
182183
SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));
183184

184185
PTR_UIntNative pPreservedRegsCursor = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_PreservedRegs);
@@ -384,9 +385,9 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO
384385
//
385386
// control state
386387
//
387-
SetControlPC(dac_cast<PTR_VOID>(pCtx->GetIp()));
388388
m_RegDisplay.SP = pCtx->GetSp();
389-
m_RegDisplay.IP = pCtx->GetIp();
389+
m_RegDisplay.IP = PCODEToPINSTR(pCtx->GetIp());
390+
SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));
390391

391392
#ifdef TARGET_ARM
392393
//
@@ -609,6 +610,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC
609610

610611
m_RegDisplay.pR0 = (PTR_UIntNative)PTR_TO_REG(pCtx, R0);
611612
m_RegDisplay.pR1 = (PTR_UIntNative)PTR_TO_REG(pCtx, R1);
613+
m_RegDisplay.pR2 = (PTR_UIntNative)PTR_TO_REG(pCtx, R2);
614+
m_RegDisplay.pR3 = (PTR_UIntNative)PTR_TO_REG(pCtx, R3);
612615
m_RegDisplay.pR4 = (PTR_UIntNative)PTR_TO_REG(pCtx, R4);
613616
m_RegDisplay.pR5 = (PTR_UIntNative)PTR_TO_REG(pCtx, R5);
614617
m_RegDisplay.pR6 = (PTR_UIntNative)PTR_TO_REG(pCtx, R6);
@@ -991,7 +994,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk()
991994
#endif
992995

993996
#if !defined(TARGET_ARM64)
994-
m_RegDisplay.SetIP(*SP++);
997+
m_RegDisplay.SetIP(PCODEToPINSTR(*SP++));
995998
#endif
996999

9971000
m_RegDisplay.SetSP((uintptr_t)dac_cast<TADDR>(SP));
@@ -1174,7 +1177,7 @@ void StackFrameIterator::UnwindUniversalTransitionThunk()
11741177
stackFrame->UnwindNonVolatileRegisters(&m_RegDisplay);
11751178

11761179
PTR_UIntNative addressOfPushedCallerIP = stackFrame->get_AddressOfPushedCallerIP();
1177-
m_RegDisplay.SetIP(*addressOfPushedCallerIP);
1180+
m_RegDisplay.SetIP(PCODEToPINSTR(*addressOfPushedCallerIP));
11781181
m_RegDisplay.SetSP((uintptr_t)dac_cast<TADDR>(stackFrame->get_CallerSP()));
11791182
SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));
11801183

@@ -1265,9 +1268,9 @@ void StackFrameIterator::UnwindThrowSiteThunk()
12651268
ASSERT_UNCONDITIONALLY("NYI for this arch");
12661269
#endif
12671270

1268-
m_RegDisplay.SetIP(pContext->IP);
1271+
m_RegDisplay.SetIP(PCODEToPINSTR(pContext->IP));
12691272
m_RegDisplay.SetSP(pContext->GetSp());
1270-
SetControlPC(dac_cast<PTR_VOID>(pContext->IP));
1273+
SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));
12711274

12721275
// We expect the throw site to be in managed code, and since this function's notion of how to unwind
12731276
// through the stub is brittle relative to the stub itself, we want to check as soon as we can.
@@ -1357,7 +1360,7 @@ void StackFrameIterator::NextInternal()
13571360
// if the thread is safe to walk, it better not have a hijack in place.
13581361
ASSERT(!m_pThread->IsHijacked());
13591362

1360-
SetControlPC(dac_cast<PTR_VOID>(m_RegDisplay.GetIP()));
1363+
SetControlPC(dac_cast<PTR_VOID>(PCODEToPINSTR(m_RegDisplay.GetIP())));
13611364

13621365
PTR_VOID collapsingTargetFrame = NULL;
13631366

@@ -1717,6 +1720,11 @@ bool StackFrameIterator::GetHijackedReturnValueLocation(PTR_OBJECTREF * pLocatio
17171720

17181721
void StackFrameIterator::SetControlPC(PTR_VOID controlPC)
17191722
{
1723+
#if TARGET_ARM
1724+
// Ensure that PC doesn't have the Thumb bit set. This needs to be
1725+
// consistent for EQUALS_RETURN_ADDRESS to work.
1726+
ASSERT(((uintptr_t)controlPC & 1) == 0);
1727+
#endif
17201728
m_OriginalControlPC = m_ControlPC = controlPC;
17211729
}
17221730

src/coreclr/nativeaot/Runtime/arm/GcProbe.S

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,105 @@
88
#include "AsmOffsets.inc"
99

1010
.global RhpGcPoll2
11+
.global RhpThrowHwEx
12+
13+
// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers
14+
// and accepts the register bitmask
15+
// Call this macro first in the method (no further prolog instructions can be added after this).
16+
//
17+
// threadReg : register containing the Thread* (this will be preserved).
18+
// trashReg : register that can be trashed by this macro
19+
// BITMASK : value to initialize m_dwFlags field with (register or #constant)
20+
.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK
21+
// Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving
22+
// incoming register values into it.
23+
PROLOG_VPUSH "{d0-d3}" // Save d0-d3 which can have the floating point return value
24+
PROLOG_STACK_ALLOC 4 // Padding for 8-byte alignment
25+
PROLOG_PUSH "{r0,r1}" // Save return registers
26+
PROLOG_STACK_ALLOC 4 // Space for caller's SP
27+
PROLOG_PUSH "{r4-r10}" // Save non-volatile registers
28+
PROLOG_STACK_ALLOC 8 // Space for flags and Thread*
29+
PROLOG_PUSH "{r11}" // Save caller's frame pointer
30+
PROLOG_PUSH "{r11,lr}" // Save frame-chain pointer and return address
31+
32+
str \threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread]
33+
mov \trashReg, \BITMASK
34+
str \trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
35+
36+
// Compute SP value at entry to this method and save it in slot of the frame.
37+
add \trashReg, sp, #(16 * 4 + 4 * 8)
38+
str \trashReg, [sp, #(12 * 4)]
39+
40+
// Link the frame into the Thread
41+
str sp, [\threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame]
42+
.endm
43+
44+
//
45+
// Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved
46+
// registers and return value to their values from before the probe was called (while also updating any
47+
// object refs or byrefs).
48+
//
49+
.macro POP_PROBE_FRAME
50+
EPILOG_POP "{r11,lr}" // Restore frame-chain pointer and return address
51+
EPILOG_POP "{r11}" // Restore caller's frame pointer
52+
EPILOG_STACK_FREE 8 // Discard flags and Thread*
53+
EPILOG_POP "{r4-r10}" // Restore non-volatile registers
54+
EPILOG_STACK_FREE 4 // Discard caller's SP
55+
EPILOG_POP "{r0,r1}" // Restore return registers
56+
EPILOG_STACK_FREE 4 // Discard padding for 8-byte alignment
57+
EPILOG_VPOP "{d0-d3}" // Restore d0-d3 which can have the floating point return value
58+
.endm
59+
60+
//
61+
// The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and
62+
// clears the hijack state.
63+
//
64+
// Register state on entry:
65+
// All registers correct for return to the original return address.
66+
//
67+
// Register state on exit:
68+
// r2: thread pointer
69+
// r3: trashed
70+
//
71+
.macro FixupHijackedCallstack
72+
push {r0, r1}
73+
74+
// r0 <- GetThread()
75+
INLINE_GETTHREAD
76+
77+
mov r2, r0
78+
pop {r0, r1}
79+
80+
// Fix the stack by restoring the original return address
81+
ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
82+
ldr r12, [r2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags]
83+
84+
// Clear hijack state
85+
mov r3, #0
86+
str r3, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
87+
str r3, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
88+
str r3, [r2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags]
89+
.endm
90+
91+
NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler
92+
PUSH_PROBE_FRAME r2, r3, r12
93+
94+
ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame]
95+
bl RhpWaitForGC2
96+
97+
ldr r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
98+
tst r2, #PTFF_THREAD_ABORT
99+
bne LOCAL_LABEL(ThrowThreadAbort)
100+
101+
POP_PROBE_FRAME
102+
bx lr
103+
104+
LOCAL_LABEL(ThrowThreadAbort):
105+
POP_PROBE_FRAME
106+
mov r0, #STATUS_REDHAWK_THREAD_ABORT
107+
mov r1, lr // return address as exception PC
108+
b C_FUNC(RhpThrowHwEx)
109+
NESTED_END RhpWaitForGC
11110

12111
LEAF_ENTRY RhpGcPoll
13112
PREPARE_EXTERNAL_VAR_INDIRECT RhpTrapThreads, r0
@@ -24,8 +123,16 @@ NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler
24123
NESTED_END RhpGcPollRare
25124

26125
NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
27-
// Not implemented
28-
EMIT_BREAKPOINT
126+
FixupHijackedCallstack
127+
128+
PREPARE_EXTERNAL_VAR_INDIRECT RhpTrapThreads, r3
129+
tst r3, #TrapThreadsFlags_TrapThreads
130+
bne LOCAL_LABEL(WaitForGC)
131+
bx lr
132+
LOCAL_LABEL(WaitForGC):
133+
mov r3, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0)
134+
orr r12, r3
135+
b RhpWaitForGC
29136
NESTED_END RhpGcProbeHijack
30137

31138
#ifdef FEATURE_GC_STRESS

src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@
256256
#define MCREG_Lr(mc) ((mc).arm_lr)
257257
#define MCREG_R0(mc) ((mc).arm_r0)
258258
#define MCREG_R1(mc) ((mc).arm_r1)
259+
#define MCREG_R2(mc) ((mc).arm_r2)
260+
#define MCREG_R3(mc) ((mc).arm_r3)
259261
#define MCREG_R4(mc) ((mc).arm_r4)
260262
#define MCREG_R5(mc) ((mc).arm_r5)
261263
#define MCREG_R6(mc) ((mc).arm_r6)
@@ -514,6 +516,8 @@ uint64_t GetPC(void* context)
514516
uint64_t& UNIX_CONTEXT::Lr(){ return (uint64_t&)MCREG_Lr(ctx.uc_mcontext); }
515517
uint64_t& UNIX_CONTEXT::R0(){ return (uint64_t&)MCREG_R0(ctx.uc_mcontext); }
516518
uint64_t& UNIX_CONTEXT::R1(){ return (uint64_t&)MCREG_R1(ctx.uc_mcontext); }
519+
uint64_t& UNIX_CONTEXT::R2(){ return (uint64_t&)MCREG_R2(ctx.uc_mcontext); }
520+
uint64_t& UNIX_CONTEXT::R3(){ return (uint64_t&)MCREG_R3(ctx.uc_mcontext); }
517521
uint64_t& UNIX_CONTEXT::R4(){ return (uint64_t&)MCREG_R4(ctx.uc_mcontext); }
518522
uint64_t& UNIX_CONTEXT::R5(){ return (uint64_t&)MCREG_R5(ctx.uc_mcontext); }
519523
uint64_t& UNIX_CONTEXT::R6(){ return (uint64_t&)MCREG_R6(ctx.uc_mcontext); }

src/coreclr/nativeaot/Runtime/unix/UnixContext.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ struct UNIX_CONTEXT
126126
uint64_t& Lr();
127127
uint64_t& R0();
128128
uint64_t& R1();
129+
uint64_t& R2();
130+
uint64_t& R3();
129131
uint64_t& R4();
130132
uint64_t& R5();
131133
uint64_t& R6();
@@ -143,6 +145,8 @@ struct UNIX_CONTEXT
143145
{
144146
lambda((size_t*)&R0());
145147
lambda((size_t*)&R1());
148+
lambda((size_t*)&R2());
149+
lambda((size_t*)&R3());
146150
lambda((size_t*)&R4());
147151
lambda((size_t*)&R5());
148152
lambda((size_t*)&R6());

0 commit comments

Comments
 (0)