Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 52e146e

Browse files
committed
implement profiler ELT callbacks for AMD64 Linux
1 parent 32fa491 commit 52e146e

File tree

4 files changed

+183
-13
lines changed

4 files changed

+183
-13
lines changed

src/jit/codegencommon.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7298,13 +7298,15 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
72987298
return;
72997299
}
73007300

7301-
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
7301+
#if defined(_TARGET_AMD64_)
73027302
unsigned varNum;
73037303
LclVarDsc* varDsc;
73047304

73057305
// Since the method needs to make a profiler callback, it should have out-going arg space allocated.
73067306
noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
7307+
#if !defined(UNIX_AMD64_ABI)
73077308
noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
7309+
#endif
73087310

73097311
// Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
73107312
// In case of vararg methods, arg regs are already homed.
@@ -7367,7 +7369,11 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
73677369
// This will emit either
73687370
// "call ip-relative 32-bit offset" or
73697371
// "mov rax, helper addr; call rax"
7372+
#if !defined(UNIX_AMD64_ABI)
73707373
genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
7374+
#else
7375+
genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN, REG_ARG_2);
7376+
#endif
73717377

73727378
// TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
73737379
// generation logic that moves args around as required by first BB entry point conditions
@@ -7526,11 +7532,12 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
75267532
// Need to save on to the stack level, since the helper call will pop the argument
75277533
unsigned saveStackLvl2 = genStackLevel;
75287534

7529-
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
7530-
7535+
#if defined(_TARGET_AMD64_)
75317536
// Since the method needs to make a profiler callback, it should have out-going arg space allocated.
75327537
noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
7538+
#if !defined(UNIX_AMD64_ABI)
75337539
noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
7540+
#endif
75347541

75357542
// If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
75367543
// registers that profiler callback kills.

src/vm/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ else(WIN32)
352352

353353
if(CLR_CMAKE_TARGET_ARCH_AMD64)
354354
set(VM_SOURCES_WKS_ARCH_ASM
355+
${ARCH_SOURCES_DIR}/asmhelpers.S
355356
${ARCH_SOURCES_DIR}/calldescrworkeramd64.S
356357
${ARCH_SOURCES_DIR}/crthelpers.S
357358
${ARCH_SOURCES_DIR}/externalmethodfixupthunk.S

src/vm/amd64/asmhelpers.S

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
.intel_syntax noprefix
6+
#include "unixasmmacros.inc"
7+
#include "asmconstants.h"
8+
9+
#define real4 dword
10+
#define real8 qword
11+
12+
//
13+
// typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
14+
// {
15+
// FunctionID *functionId; // function ID comes in the r11 register
16+
// void *rbp;
17+
// void *probersp;
18+
// void *ip;
19+
// void *profiledRsp;
20+
// UINT64 rax;
21+
// LPVOID hiddenArg;
22+
// UINT64 flt0;
23+
// UINT64 flt1;
24+
// UINT64 flt2;
25+
// UINT64 flt3;
26+
// UINT32 flags;
27+
// } PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA;
28+
//
29+
.equ SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA, 0x8*11 + 0x4*2 // includes fudge to make FP_SPILL right
30+
.equ SIZEOF_OUTGOING_ARGUMENT_HOMES, 0x8*6
31+
.equ SIZEOF_FP_ARG_SPILL, 0x10*1
32+
33+
// Need to be careful to keep the stack 16byte aligned here, since we are pushing 3
34+
// arguments that will align the stack and we just want to keep it aligned with our
35+
// SIZEOF_STACK_FRAME
36+
37+
.equ OFFSETOF_PLATFORM_SPECIFIC_DATA, SIZEOF_OUTGOING_ARGUMENT_HOMES
38+
39+
// we'll just spill into the PROFILE_PLATFORM_SPECIFIC_DATA structure
40+
.equ OFFSETOF_FP_ARG_SPILL, SIZEOF_OUTGOING_ARGUMENT_HOMES + SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA
41+
42+
.equ SIZEOF_STACK_FRAME, SIZEOF_OUTGOING_ARGUMENT_HOMES + SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA + SIZEOF_MAX_FP_ARG_SPILL
43+
44+
.equ PROFILE_ENTER, 0x1
45+
.equ PROFILE_LEAVE, 0x2
46+
.equ PROFILE_TAILCALL, 0x4
47+
48+
// ***********************************************************
49+
// NOTE:
50+
//
51+
// Register preservation scheme:
52+
//
53+
// Preserved:
54+
// - all non-volatile registers
55+
// - rax
56+
// - xmm0
57+
//
58+
// Not Preserved:
59+
// - integer argument registers (rcx, rdx, r8, r9)
60+
// - floating point argument registers (xmm1-3)
61+
// - volatile integer registers (r10, r11)
62+
// - volatile floating point registers (xmm4-5)
63+
//
64+
// ***********************************************************
65+
66+
// EXTERN_C void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp);
67+
// <NOTE>
68+
//
69+
// </NOTE>
70+
NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler
71+
// Upon entry :
72+
// rdi = clientInfo
73+
// rsi = profiledRsp
74+
75+
// need to be careful with rax here because it contains the return value which we want to harvest
76+
77+
push_nonvol_reg rax
78+
79+
lea rax, [rsp + 0x10] // caller rsp
80+
mov r10, [rax - 0x8] // return address
81+
82+
alloc_stack SIZEOF_STACK_FRAME
83+
84+
// correctness of return value in structure doesn't matter for enter probe
85+
86+
87+
// setup ProfilePlatformSpecificData structure
88+
xor r8, r8 // nullify r8
89+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x0], r8 // r8 is null -- struct functionId field
90+
save_reg_postrsp rbp, OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x8 // -- struct rbp field
91+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x10], rax // caller rsp -- struct probeRsp field
92+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x18], r10 // return address -- struct ip field
93+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x20], rsi // -- struct profiledRsp field
94+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x28], r8 // return value -- struct rax field
95+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x30], r8 // r8 is null -- struct hiddenArg field
96+
movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x38], xmm0 // -- struct flt0 field
97+
movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x40], xmm1 // -- struct flt1 field
98+
movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x48], xmm2 // -- struct flt2 field
99+
movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x50], xmm3 // -- struct flt3 field
100+
mov r10, 0x1 // PROFILE_ENTER
101+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x58], r10d // flags ; -- struct flags field
102+
103+
// we need to be able to restore the fp return register
104+
save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL + 0x0
105+
END_PROLOGUE
106+
107+
// rdi already contains the clientInfo
108+
lea rsi, [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA]
109+
call ProfileEnter
110+
111+
// restore fp return register
112+
movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL + 0x0]
113+
114+
// begin epilogue
115+
free_stack SIZEOF_STACK_FRAME
116+
117+
pop_nonvol_reg rax
118+
ret
119+
NESTED_END ProfileEnterNaked, _TEXT
120+
121+
// EXTERN_C void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID, size_t profiledRsp);
122+
// <NOTE>
123+
//
124+
// </NOTE>
125+
NESTED_ENTRY ProfileLeaveNaked, _TEXT, NoHandler
126+
// Upon entry :
127+
// rdi = clientInfo
128+
// rsi = profiledRsp
129+
130+
// need to be careful with rax here because it contains the return value which we want to harvest
131+
push_nonvol_reg rax
132+
133+
lea rax, [rsp + 0x10] // caller rsp
134+
mov r10, [rax - 0x8] // return address
135+
136+
alloc_stack SIZEOF_STACK_FRAME
137+
138+
// correctness of argument registers in structure doesn't matter for leave probe
139+
140+
// setup ProfilePlatformSpecificData structure
141+
xor r8, r8 // nullify r8
142+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x0], r8 // r8 is null -- struct functionId field
143+
save_reg_postrsp rbp, OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x8 // -- struct rbp field
144+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x10], rax // caller rsp -- struct probeRsp field
145+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x18], r10 // return address -- struct ip field
146+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x20], rsi // -- struct profiledRsp field
147+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x28], r8 // return value -- struct rax field
148+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x30], r8 // r8 is null -- struct hiddenArg field
149+
movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x38], xmm0 // -- struct flt0 field
150+
movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x40], xmm1 // -- struct flt1 field
151+
movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x48], xmm2 // -- struct flt2 field
152+
movsd real8 ptr [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x50], xmm3 // -- struct flt3 field
153+
mov r10, 0x2 // PROFILE_LEAVE
154+
mov [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA + 0x58], r10d // flags -- struct flags field
155+
156+
// we need to be able to restore the fp return register
157+
save_xmm128_postrsp xmm0, OFFSETOF_FP_ARG_SPILL + 0x0
158+
END_PROLOGUE
159+
160+
// rdi already contains the clientInfo
161+
lea rsi, [rsp + OFFSETOF_PLATFORM_SPECIFIC_DATA]
162+
call ProfileLeave
163+
164+
// restore fp return register
165+
movdqa xmm0, [rsp + OFFSETOF_FP_ARG_SPILL + 0x0]
166+
167+
// begin epilogue
168+
free_stack SIZEOF_STACK_FRAME
169+
170+
pop_nonvol_reg rax
171+
ret
172+
NESTED_END ProfileLeaveNaked, _TEXT

src/vm/amd64/unixstubs.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,6 @@ extern "C"
2626
PORTABILITY_ASSERT("Implement for PAL");
2727
}
2828

29-
void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID)
30-
{
31-
PORTABILITY_ASSERT("Implement for PAL");
32-
}
33-
34-
void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID)
35-
{
36-
PORTABILITY_ASSERT("Implement for PAL");
37-
}
38-
3929
void ProfileTailcallNaked(FunctionIDOrClientID functionIDOrClientID)
4030
{
4131
PORTABILITY_ASSERT("Implement for PAL");

0 commit comments

Comments
 (0)