Skip to content

Commit 22fc892

Browse files
committed
Allow Linux perf profiler to see Python calls
1 parent 6ec57e7 commit 22fc892

17 files changed

+350
-2
lines changed

Include/cpython/initconfig.h

+1
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ typedef struct PyConfig {
142142
unsigned long hash_seed;
143143
int faulthandler;
144144
int tracemalloc;
145+
int perf_profiling;
145146
int import_time;
146147
int code_debug_ranges;
147148
int show_ref_count;

Include/internal/pycore_ceval.h

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ extern PyObject* _PyEval_BuiltinsFromGlobals(
6565
PyThreadState *tstate,
6666
PyObject *globals);
6767

68+
extern int _PyPerfTrampoline_Init(int activate);
6869

6970
static inline PyObject*
7071
_PyEval_EvalFrame(PyThreadState *tstate, struct _PyInterpreterFrame *frame, int throwflag)

Makefile.pre.in

+6-1
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,9 @@ OBJECT_OBJS= \
475475
Objects/unicodeobject.o \
476476
Objects/unicodectype.o \
477477
Objects/unionobject.o \
478-
Objects/weakrefobject.o
478+
Objects/weakrefobject.o \
479+
Objects/perf_trampoline.o \
480+
@PERF_TRAMPOLINE_OBJ@
479481

480482
DEEPFREEZE_OBJS = Python/deepfreeze/deepfreeze.o
481483

@@ -2318,6 +2320,9 @@ config.status: $(srcdir)/configure
23182320

23192321
.PRECIOUS: config.status $(BUILDPYTHON) Makefile Makefile.pre
23202322

2323+
Objects/asm_trampoline.o: $(srcdir)/Objects/asm_trampoline.sx
2324+
$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
2325+
23212326
# Some make's put the object file in the current directory
23222327
.c.o:
23232328
$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Add a new ``-X perf`` Python command line option as well as
2+
:func:`sys._activate_perf_trampoline` and
3+
:func:`sys._deactivate_perf_trampoline` function in the :mod:`sys` module
4+
that allows to set/unset the interpreter in a way that the Linux ``perf``
5+
profiler can detect Python calls. Patch by Pablo Galindo.

Objects/asm_trampoline.sx

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
.text
2+
.globl _Py_trampoline_func_start
3+
_Py_trampoline_func_start:
4+
#ifdef __x86_64__
5+
push %rbp
6+
mov %rsp,%rbp
7+
mov %rdi,%rax
8+
mov %rsi,%rdi
9+
mov %rdx,%rsi
10+
mov %ecx,%edx
11+
call *%rax
12+
pop %rbp
13+
ret
14+
#endif // __x86_64__
15+
#ifdef __aarch64__
16+
TODO
17+
#endif
18+
.globl _Py_trampoline_func_end
19+
_Py_trampoline_func_end:
20+
.section .note.GNU-stack,"",@progbits
21+

Objects/perf_trampoline.c

+165
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#include "Python.h"
2+
#include "pycore_ceval.h"
3+
#include "pycore_frame.h"
4+
#include "pycore_interp.h"
5+
6+
#ifdef HAVE_PERF_TRAMPOLINE
7+
8+
#include <stdio.h>
9+
#include <stdlib.h>
10+
#include <sys/mman.h>
11+
#include <sys/types.h>
12+
#include <unistd.h>
13+
14+
typedef PyObject *(*py_evaluator)(PyThreadState *, _PyInterpreterFrame *,
15+
int throwflag);
16+
typedef PyObject *(*py_trampoline)(py_evaluator, PyThreadState *,
17+
_PyInterpreterFrame *, int throwflag);
18+
extern void *_Py_trampoline_func_start;
19+
extern void *_Py_trampoline_func_end;
20+
21+
typedef struct {
22+
char *start_addr;
23+
char *current_addr;
24+
size_t size;
25+
size_t size_left;
26+
size_t code_size;
27+
} code_arena_t;
28+
29+
static Py_ssize_t extra_code_index = -1;
30+
static code_arena_t code_arena;
31+
32+
static int
33+
new_code_arena()
34+
{
35+
size_t page_size = sysconf(_SC_PAGESIZE);
36+
char *memory = mmap(NULL, // address
37+
page_size, PROT_READ | PROT_WRITE | PROT_EXEC,
38+
MAP_PRIVATE | MAP_ANONYMOUS,
39+
-1, // fd (not used here)
40+
0); // offset (not used here)
41+
if (!memory) {
42+
Py_FatalError("Failed to allocate new code arena");
43+
return -1;
44+
}
45+
void *start = &_Py_trampoline_func_start;
46+
void *end = &_Py_trampoline_func_end;
47+
size_t code_size = end - start;
48+
49+
long n_copies = page_size / code_size;
50+
for (int i = 0; i < n_copies; i++) {
51+
memcpy(memory + i * code_size, start, code_size * sizeof(char));
52+
}
53+
54+
mprotect(memory, page_size, PROT_READ | PROT_EXEC);
55+
56+
code_arena.start_addr = memory;
57+
code_arena.current_addr = memory;
58+
code_arena.size = page_size;
59+
code_arena.size_left = page_size;
60+
code_arena.code_size = code_size;
61+
return 0;
62+
}
63+
64+
static inline py_trampoline
65+
code_arena_new_code(code_arena_t *code_arena)
66+
{
67+
py_trampoline trampoline = (py_trampoline)code_arena->current_addr;
68+
code_arena->size_left -= code_arena->code_size;
69+
code_arena->current_addr += code_arena->code_size;
70+
return trampoline;
71+
}
72+
73+
static inline py_trampoline
74+
compile_trampoline(void)
75+
{
76+
if (code_arena.size_left <= code_arena.code_size) {
77+
if (new_code_arena() < 0) {
78+
return NULL;
79+
}
80+
}
81+
82+
assert(code_arena.size_left <= code_arena.size);
83+
return code_arena_new_code(&code_arena);
84+
}
85+
86+
static inline FILE *
87+
perf_map_open(pid_t pid)
88+
{
89+
char filename[100];
90+
snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", pid);
91+
FILE *res = fopen(filename, "a");
92+
if (!res) {
93+
_Py_FatalErrorFormat(__func__, "Couldn't open %s: errno(%d)", filename, errno);
94+
return NULL;
95+
}
96+
return res;
97+
}
98+
99+
static inline int
100+
perf_map_close(FILE *fp)
101+
{
102+
if (fp) {
103+
return fclose(fp);
104+
}
105+
return 0;
106+
}
107+
108+
static void
109+
perf_map_write_entry(FILE *method_file, const void *code_addr,
110+
unsigned int code_size, const char *entry,
111+
const char *file)
112+
{
113+
fprintf(method_file, "%lx %x py::%s:%s\n", (unsigned long)code_addr,
114+
code_size, entry, file);
115+
}
116+
117+
static PyObject *
118+
py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
119+
int throw)
120+
{
121+
PyCodeObject *co = frame->f_code;
122+
py_trampoline f = NULL;
123+
_PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
124+
if (f == NULL) {
125+
if (extra_code_index == -1) {
126+
extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
127+
}
128+
py_trampoline new_trampoline = compile_trampoline();
129+
if (new_trampoline == NULL) {
130+
return NULL;
131+
}
132+
FILE *pfile = perf_map_open(getpid());
133+
if (pfile == NULL) {
134+
return NULL;
135+
}
136+
perf_map_write_entry(pfile, new_trampoline, code_arena.code_size,
137+
PyUnicode_AsUTF8(co->co_qualname),
138+
PyUnicode_AsUTF8(co->co_filename));
139+
perf_map_close(pfile);
140+
_PyCode_SetExtra((PyObject *)co, extra_code_index,
141+
(void *)new_trampoline);
142+
f = new_trampoline;
143+
}
144+
assert(f != NULL);
145+
return f(_PyEval_EvalFrameDefault, ts, frame, throw);
146+
}
147+
#endif
148+
149+
int
150+
_PyPerfTrampoline_Init(int activate)
151+
{
152+
PyThreadState *tstate = _PyThreadState_GET();
153+
if (!activate) {
154+
tstate->interp->eval_frame = NULL;
155+
}
156+
else {
157+
#ifdef HAVE_PERF_TRAMPOLINE
158+
tstate->interp->eval_frame = py_trampoline_evaluator;
159+
if (new_code_arena() < 0) {
160+
return -1;
161+
}
162+
#endif
163+
}
164+
return 0;
165+
}

PCbuild/_freeze_module.vcxproj

+1
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
<ClCompile Include="..\Objects\cellobject.c" />
130130
<ClCompile Include="..\Objects\classobject.c" />
131131
<ClCompile Include="..\Objects\codeobject.c" />
132+
<ClCompile Include="..\Objects\perf_trampoline.c" />
132133
<ClCompile Include="..\Objects\complexobject.c" />
133134
<ClCompile Include="..\Objects\descrobject.c" />
134135
<ClCompile Include="..\Objects\dictobject.c" />

PCbuild/_freeze_module.vcxproj.filters

+3
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@
8585
<ClCompile Include="..\Objects\codeobject.c">
8686
<Filter>Source Files</Filter>
8787
</ClCompile>
88+
<ClCompile Include="..\Objects\perf_trampolie.c">
89+
<Filter>Source Files</Filter>
90+
</ClCompile>
8891
<ClCompile Include="..\Python\compile.c">
8992
<Filter>Source Files</Filter>
9093
</ClCompile>

PCbuild/pythoncore.vcxproj

+1
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,7 @@
430430
<ClCompile Include="..\Objects\cellobject.c" />
431431
<ClCompile Include="..\Objects\classobject.c" />
432432
<ClCompile Include="..\Objects\codeobject.c" />
433+
<ClCompile Include="..\Objects\perf_trampoline.c" />
433434
<ClCompile Include="..\Objects\complexobject.c" />
434435
<ClCompile Include="..\Objects\descrobject.c" />
435436
<ClCompile Include="..\Objects\dictobject.c" />

PCbuild/pythoncore.vcxproj.filters

+3
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,9 @@
926926
<ClCompile Include="..\Objects\codeobject.c">
927927
<Filter>Objects</Filter>
928928
</ClCompile>
929+
<ClCompile Include="..\Objects\perf_trampoline.c">
930+
<Filter>Objects</Filter>
931+
</ClCompile>
929932
<ClCompile Include="..\Objects\complexobject.c">
930933
<Filter>Objects</Filter>
931934
</ClCompile>

Python/clinic/sysmodule.c.h

+37-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/initconfig.c

+24
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,7 @@ _PyConfig_InitCompatConfig(PyConfig *config)
745745
config->use_hash_seed = -1;
746746
config->faulthandler = -1;
747747
config->tracemalloc = -1;
748+
config->perf_profiling = -1;
748749
config->module_search_paths_set = 0;
749750
config->parse_argv = 0;
750751
config->site_import = -1;
@@ -829,6 +830,7 @@ PyConfig_InitIsolatedConfig(PyConfig *config)
829830
config->use_hash_seed = 0;
830831
config->faulthandler = 0;
831832
config->tracemalloc = 0;
833+
config->perf_profiling = 0;
832834
config->safe_path = 1;
833835
config->pathconfig_warnings = 0;
834836
#ifdef MS_WINDOWS
@@ -940,6 +942,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2)
940942
COPY_ATTR(_install_importlib);
941943
COPY_ATTR(faulthandler);
942944
COPY_ATTR(tracemalloc);
945+
COPY_ATTR(perf_profiling);
943946
COPY_ATTR(import_time);
944947
COPY_ATTR(code_debug_ranges);
945948
COPY_ATTR(show_ref_count);
@@ -1050,6 +1053,7 @@ _PyConfig_AsDict(const PyConfig *config)
10501053
SET_ITEM_UINT(hash_seed);
10511054
SET_ITEM_INT(faulthandler);
10521055
SET_ITEM_INT(tracemalloc);
1056+
SET_ITEM_INT(perf_profiling);
10531057
SET_ITEM_INT(import_time);
10541058
SET_ITEM_INT(code_debug_ranges);
10551059
SET_ITEM_INT(show_ref_count);
@@ -1331,6 +1335,7 @@ _PyConfig_FromDict(PyConfig *config, PyObject *dict)
13311335
CHECK_VALUE("hash_seed", config->hash_seed <= MAX_HASH_SEED);
13321336
GET_UINT(faulthandler);
13331337
GET_UINT(tracemalloc);
1338+
GET_UINT(perf_profiling);
13341339
GET_UINT(import_time);
13351340
GET_UINT(code_debug_ranges);
13361341
GET_UINT(show_ref_count);
@@ -1687,6 +1692,16 @@ config_read_env_vars(PyConfig *config)
16871692
return _PyStatus_OK();
16881693
}
16891694

1695+
static PyStatus
1696+
config_init_perf_profiling(PyConfig *config)
1697+
{
1698+
const wchar_t *xoption = config_get_xoption(config, L"perf");
1699+
if (xoption) {
1700+
config->perf_profiling = 1;
1701+
}
1702+
return _PyStatus_OK();
1703+
1704+
}
16901705

16911706
static PyStatus
16921707
config_init_tracemalloc(PyConfig *config)
@@ -1788,6 +1803,12 @@ config_read_complex_options(PyConfig *config)
17881803
return status;
17891804
}
17901805
}
1806+
if (config->tracemalloc < 0) {
1807+
status = config_init_perf_profiling(config);
1808+
if (_PyStatus_EXCEPTION(status)) {
1809+
return status;
1810+
}
1811+
}
17911812

17921813
if (config->pycache_prefix == NULL) {
17931814
status = config_init_pycache_prefix(config);
@@ -2104,6 +2125,9 @@ config_read(PyConfig *config, int compute_path_config)
21042125
if (config->tracemalloc < 0) {
21052126
config->tracemalloc = 0;
21062127
}
2128+
if (config->perf_profiling < 0) {
2129+
config->perf_profiling = 0;
2130+
}
21072131
if (config->use_hash_seed < 0) {
21082132
config->use_hash_seed = 0;
21092133
config->hash_seed = 0;

0 commit comments

Comments
 (0)