Skip to content

Commit be0c106

Browse files
gsallamczardozgpsheadcarljm
authored
gh-103295: expose API for writing perf map files (#103546)
Co-authored-by: Aniket Panse <[email protected]> Co-authored-by: Gregory P. Smith <[email protected]> Co-authored-by: Carl Meyer <[email protected]>
1 parent 2e91c7e commit be0c106

File tree

11 files changed

+213
-72
lines changed

11 files changed

+213
-72
lines changed

Doc/c-api/perfmaps.rst

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
.. highlight:: c
2+
3+
.. _perfmaps:
4+
5+
Support for Perf Maps
6+
----------------------
7+
8+
On supported platforms (as of this writing, only Linux), the runtime can take
9+
advantage of *perf map files* to make Python functions visible to an external
10+
profiling tool (such as `perf <https://perf.wiki.kernel.org/index.php/Main_Page>`_).
11+
A running process may create a file in the ``/tmp`` directory, which contains entries
12+
that can map a section of executable code to a name. This interface is described in the
13+
`documentation of the Linux Perf tool <https://git.kernel.org/pub/scm/linux/
14+
kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt>`_.
15+
16+
In Python, these helper APIs can be used by libraries and features that rely
17+
on generating machine code on the fly.
18+
19+
Note that holding the Global Interpreter Lock (GIL) is not required for these APIs.
20+
21+
.. c:function:: int PyUnstable_PerfMapState_Init(void)
22+
23+
Open the ``/tmp/perf-$pid.map`` file, unless it's already opened, and create
24+
a lock to ensure thread-safe writes to the file (provided the writes are
25+
done through :c:func:`PyUnstable_WritePerfMapEntry`). Normally, there's no need
26+
to call this explicitly; just use :c:func:`PyUnstable_WritePerfMapEntry`
27+
and it will initialize the state on first call.
28+
29+
Returns ``0`` on success, ``-1`` on failure to create/open the perf map file,
30+
or ``-2`` on failure to create a lock. Check ``errno`` for more information
31+
about the cause of a failure.
32+
33+
.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name)
34+
35+
Write one single entry to the ``/tmp/perf-$pid.map`` file. This function is
36+
thread safe. Here is what an example entry looks like::
37+
38+
# address size name
39+
7f3529fcf759 b py::bar:/run/t.py
40+
41+
Will call :c:func:`PyUnstable_PerfMapState_Init` before writing the entry, if
42+
the perf map file is not already opened. Returns ``0`` on success, or the
43+
same error codes as :c:func:`PyUnstable_PerfMapState_Init` on failure.
44+
45+
.. c:function:: void PyUnstable_PerfMapState_Fini(void)
46+
47+
Close the perf map file opened by :c:func:`PyUnstable_PerfMapState_Init`.
48+
This is called by the runtime itself during interpreter shut-down. In
49+
general, there shouldn't be a reason to explicitly call this, except to
50+
handle specific scenarios such as forking.

Doc/c-api/utilities.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ and parsing function arguments and constructing Python values from C values.
1919
conversion.rst
2020
reflection.rst
2121
codec.rst
22+
perfmaps.rst

Doc/howto/perf_profiling.rst

+1-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ functions to appear in the output of the ``perf`` profiler. When this mode is
2424
enabled, the interpreter will interpose a small piece of code compiled on the
2525
fly before the execution of every Python function and it will teach ``perf`` the
2626
relationship between this piece of code and the associated Python function using
27-
`perf map files`_.
27+
:doc:`perf map files <../c-api/perfmaps>`.
2828

2929
.. note::
3030

@@ -206,5 +206,3 @@ You can check if your system has been compiled with this flag by running::
206206
If you don't see any output it means that your interpreter has not been compiled with
207207
frame pointers and therefore it may not be able to show Python functions in the output
208208
of ``perf``.
209-
210-
.. _perf map files: https://github.com/torvalds/linux/blob/0513e464f9007b70b96740271a948ca5ab6e7dd7/tools/perf/Documentation/jit-interface.txt

Include/sysmodule.h

+13
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,19 @@ Py_DEPRECATED(3.11) PyAPI_FUNC(int) PySys_HasWarnOptions(void);
2929
Py_DEPRECATED(3.11) PyAPI_FUNC(void) PySys_AddXOption(const wchar_t *);
3030
PyAPI_FUNC(PyObject *) PySys_GetXOptions(void);
3131

32+
#if !defined(Py_LIMITED_API)
33+
typedef struct {
34+
FILE* perf_map;
35+
PyThread_type_lock map_lock;
36+
} PerfMapState;
37+
38+
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void);
39+
40+
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name);
41+
42+
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
43+
#endif
44+
3245
#ifndef Py_LIMITED_API
3346
# define Py_CPYTHON_SYSMODULE_H
3447
# include "cpython/sysmodule.h"

Lib/test/test_perfmaps.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import os
2+
import sys
3+
import unittest
4+
5+
from _testinternalcapi import perf_map_state_teardown, write_perf_map_entry
6+
7+
if sys.platform != 'linux':
8+
raise unittest.SkipTest('Linux only')
9+
10+
11+
class TestPerfMapWriting(unittest.TestCase):
12+
def test_write_perf_map_entry(self):
13+
self.assertEqual(write_perf_map_entry(0x1234, 5678, "entry1"), 0)
14+
self.assertEqual(write_perf_map_entry(0x2345, 6789, "entry2"), 0)
15+
with open(f"/tmp/perf-{os.getpid()}.map") as f:
16+
perf_file_contents = f.read()
17+
self.assertIn("1234 162e entry1", perf_file_contents)
18+
self.assertIn("2345 1a85 entry2", perf_file_contents)
19+
perf_map_state_teardown()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Introduced :c:func:`PyUnstable_WritePerfMapEntry`, :c:func:`PyUnstable_PerfMapState_Init` and
2+
:c:func:`PyUnstable_PerfMapState_Fini`. These allow extension modules (JIT compilers in
3+
particular) to write to perf-map files in a thread safe manner. The
4+
:doc:`../howto/perf_profiling` also uses these APIs to write
5+
entries in the perf-map file.

Modules/_testinternalcapi.c

+27
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,31 @@ clear_extension(PyObject *self, PyObject *args)
759759
Py_RETURN_NONE;
760760
}
761761

762+
static PyObject *
763+
write_perf_map_entry(PyObject *self, PyObject *args)
764+
{
765+
const void *code_addr;
766+
unsigned int code_size;
767+
const char *entry_name;
768+
769+
if (!PyArg_ParseTuple(args, "KIs", &code_addr, &code_size, &entry_name))
770+
return NULL;
771+
772+
int ret = PyUnstable_WritePerfMapEntry(code_addr, code_size, entry_name);
773+
if (ret == -1) {
774+
PyErr_SetString(PyExc_OSError, "Failed to write performance map entry");
775+
return NULL;
776+
}
777+
return Py_BuildValue("i", ret);
778+
}
779+
780+
static PyObject *
781+
perf_map_state_teardown(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored))
782+
{
783+
PyUnstable_PerfMapState_Fini();
784+
Py_RETURN_NONE;
785+
}
786+
762787
static PyObject *
763788
iframe_getcode(PyObject *self, PyObject *frame)
764789
{
@@ -815,6 +840,8 @@ static PyMethodDef module_functions[] = {
815840
_TESTINTERNALCAPI_ASSEMBLE_CODE_OBJECT_METHODDEF
816841
{"get_interp_settings", get_interp_settings, METH_VARARGS, NULL},
817842
{"clear_extension", clear_extension, METH_VARARGS, NULL},
843+
{"write_perf_map_entry", write_perf_map_entry, METH_VARARGS},
844+
{"perf_map_state_teardown", perf_map_state_teardown, METH_NOARGS},
818845
{"iframe_getcode", iframe_getcode, METH_O, NULL},
819846
{"iframe_getline", iframe_getline, METH_O, NULL},
820847
{"iframe_getlasti", iframe_getlasti, METH_O, NULL},

Python/perf_trampoline.c

+16-68
Original file line numberDiff line numberDiff line change
@@ -193,75 +193,33 @@ typedef struct trampoline_api_st trampoline_api_t;
193193
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
194194
#define perf_map_file _PyRuntime.ceval.perf.map_file
195195

196-
static void *
197-
perf_map_get_file(void)
198-
{
199-
if (perf_map_file) {
200-
return perf_map_file;
201-
}
202-
char filename[100];
203-
pid_t pid = getpid();
204-
// Location and file name of perf map is hard-coded in perf tool.
205-
// Use exclusive create flag wit nofollow to prevent symlink attacks.
206-
int flags = O_WRONLY | O_CREAT | O_EXCL | O_NOFOLLOW | O_CLOEXEC;
207-
snprintf(filename, sizeof(filename) - 1, "/tmp/perf-%jd.map",
208-
(intmax_t)pid);
209-
int fd = open(filename, flags, 0600);
210-
if (fd == -1) {
211-
perf_status = PERF_STATUS_FAILED;
212-
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
213-
return NULL;
214-
}
215-
perf_map_file = fdopen(fd, "w");
216-
if (!perf_map_file) {
217-
perf_status = PERF_STATUS_FAILED;
218-
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
219-
close(fd);
220-
return NULL;
221-
}
222-
return perf_map_file;
223-
}
224-
225-
static int
226-
perf_map_close(void *state)
227-
{
228-
FILE *fp = (FILE *)state;
229-
int ret = 0;
230-
if (fp) {
231-
ret = fclose(fp);
232-
}
233-
perf_map_file = NULL;
234-
perf_status = PERF_STATUS_NO_INIT;
235-
return ret;
236-
}
237196

238197
static void
239198
perf_map_write_entry(void *state, const void *code_addr,
240199
unsigned int code_size, PyCodeObject *co)
241200
{
242-
assert(state != NULL);
243-
FILE *method_file = (FILE *)state;
244-
const char *entry = PyUnicode_AsUTF8(co->co_qualname);
245-
if (entry == NULL) {
246-
_PyErr_WriteUnraisableMsg("Failed to get qualname from code object",
247-
NULL);
248-
return;
201+
const char *entry = "";
202+
if (co->co_qualname != NULL) {
203+
entry = PyUnicode_AsUTF8(co->co_qualname);
249204
}
250-
const char *filename = PyUnicode_AsUTF8(co->co_filename);
251-
if (filename == NULL) {
252-
_PyErr_WriteUnraisableMsg("Failed to get filename from code object",
253-
NULL);
205+
const char *filename = "";
206+
if (co->co_filename != NULL) {
207+
filename = PyUnicode_AsUTF8(co->co_filename);
208+
}
209+
size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1;
210+
char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size);
211+
if (perf_map_entry == NULL) {
254212
return;
255213
}
256-
fprintf(method_file, "%" PRIxPTR " %x py::%s:%s\n", (uintptr_t) code_addr, code_size, entry,
257-
filename);
258-
fflush(method_file);
214+
snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename);
215+
PyUnstable_WritePerfMapEntry(code_addr, code_size, perf_map_entry);
216+
PyMem_RawFree(perf_map_entry);
259217
}
260218

261219
_PyPerf_Callbacks _Py_perfmap_callbacks = {
262-
&perf_map_get_file,
220+
NULL,
263221
&perf_map_write_entry,
264-
&perf_map_close
222+
NULL,
265223
};
266224

267225
static int
@@ -465,13 +423,6 @@ _PyPerfTrampoline_Init(int activate)
465423
if (new_code_arena() < 0) {
466424
return -1;
467425
}
468-
if (trampoline_api.state == NULL) {
469-
void *state = trampoline_api.init_state();
470-
if (state == NULL) {
471-
return -1;
472-
}
473-
trampoline_api.state = state;
474-
}
475426
extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
476427
if (extra_code_index == -1) {
477428
return -1;
@@ -491,10 +442,6 @@ _PyPerfTrampoline_Fini(void)
491442
tstate->interp->eval_frame = NULL;
492443
}
493444
free_code_arenas();
494-
if (trampoline_api.state != NULL) {
495-
trampoline_api.free_state(trampoline_api.state);
496-
trampoline_api.state = NULL;
497-
}
498445
extra_code_index = -1;
499446
#endif
500447
return 0;
@@ -507,6 +454,7 @@ _PyPerfTrampoline_AfterFork_Child(void)
507454
// Restart trampoline in file in child.
508455
int was_active = _PyIsPerfTrampolineActive();
509456
_PyPerfTrampoline_Fini();
457+
PyUnstable_PerfMapState_Fini();
510458
if (was_active) {
511459
_PyPerfTrampoline_Init(1);
512460
}

Python/pylifecycle.c

+1
Original file line numberDiff line numberDiff line change
@@ -1775,6 +1775,7 @@ Py_FinalizeEx(void)
17751775
*/
17761776

17771777
_PyAtExit_Call(tstate->interp);
1778+
PyUnstable_PerfMapState_Fini();
17781779

17791780
/* Copy the core config, PyInterpreterState_Delete() free
17801781
the core config memory */

Python/sysmodule.c

+79-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ extern const char *PyWin_DLLVersionString;
5252
#include <emscripten.h>
5353
#endif
5454

55+
#ifdef HAVE_FCNTL_H
56+
#include <fcntl.h>
57+
#endif
58+
5559
/*[clinic input]
5660
module sys
5761
[clinic start generated code]*/
@@ -2144,7 +2148,7 @@ sys_activate_stack_trampoline_impl(PyObject *module, const char *backend)
21442148
if (strcmp(backend, "perf") == 0) {
21452149
_PyPerf_Callbacks cur_cb;
21462150
_PyPerfTrampoline_GetCallbacks(&cur_cb);
2147-
if (cur_cb.init_state != _Py_perfmap_callbacks.init_state) {
2151+
if (cur_cb.write_state != _Py_perfmap_callbacks.write_state) {
21482152
if (_PyPerfTrampoline_SetCallbacks(&_Py_perfmap_callbacks) < 0 ) {
21492153
PyErr_SetString(PyExc_ValueError, "can't activate perf trampoline");
21502154
return NULL;
@@ -2240,6 +2244,80 @@ sys__getframemodulename_impl(PyObject *module, int depth)
22402244
}
22412245

22422246

2247+
#ifdef __cplusplus
2248+
extern "C" {
2249+
#endif
2250+
2251+
static PerfMapState perf_map_state;
2252+
2253+
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void) {
2254+
#ifndef MS_WINDOWS
2255+
char filename[100];
2256+
pid_t pid = getpid();
2257+
// Use nofollow flag to prevent symlink attacks.
2258+
int flags = O_WRONLY | O_CREAT | O_APPEND | O_NOFOLLOW | O_CLOEXEC;
2259+
snprintf(filename, sizeof(filename) - 1, "/tmp/perf-%jd.map",
2260+
(intmax_t)pid);
2261+
int fd = open(filename, flags, 0600);
2262+
if (fd == -1) {
2263+
return -1;
2264+
}
2265+
else{
2266+
perf_map_state.perf_map = fdopen(fd, "a");
2267+
if (perf_map_state.perf_map == NULL) {
2268+
close(fd);
2269+
return -1;
2270+
}
2271+
}
2272+
perf_map_state.map_lock = PyThread_allocate_lock();
2273+
if (perf_map_state.map_lock == NULL) {
2274+
fclose(perf_map_state.perf_map);
2275+
return -2;
2276+
}
2277+
#endif
2278+
return 0;
2279+
}
2280+
2281+
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
2282+
const void *code_addr,
2283+
unsigned int code_size,
2284+
const char *entry_name
2285+
) {
2286+
#ifndef MS_WINDOWS
2287+
if (perf_map_state.perf_map == NULL) {
2288+
int ret = PyUnstable_PerfMapState_Init();
2289+
if(ret != 0){
2290+
return ret;
2291+
}
2292+
}
2293+
PyThread_acquire_lock(perf_map_state.map_lock, 1);
2294+
fprintf(perf_map_state.perf_map, "%" PRIxPTR " %x %s\n", (uintptr_t) code_addr, code_size, entry_name);
2295+
fflush(perf_map_state.perf_map);
2296+
PyThread_release_lock(perf_map_state.map_lock);
2297+
#endif
2298+
return 0;
2299+
}
2300+
2301+
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) {
2302+
#ifndef MS_WINDOWS
2303+
if (perf_map_state.perf_map != NULL) {
2304+
// close the file
2305+
PyThread_acquire_lock(perf_map_state.map_lock, 1);
2306+
fclose(perf_map_state.perf_map);
2307+
PyThread_release_lock(perf_map_state.map_lock);
2308+
2309+
// clean up the lock and state
2310+
PyThread_free_lock(perf_map_state.map_lock);
2311+
perf_map_state.perf_map = NULL;
2312+
}
2313+
#endif
2314+
}
2315+
2316+
#ifdef __cplusplus
2317+
}
2318+
#endif
2319+
2320+
22432321
static PyMethodDef sys_methods[] = {
22442322
/* Might as well keep this in alphabetic order */
22452323
SYS_ADDAUDITHOOK_METHODDEF

Tools/c-analyzer/cpython/ignored.tsv

+1
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ Python/pystate.c - initial -
356356
Python/specialize.c - adaptive_opcodes -
357357
Python/specialize.c - cache_requirements -
358358
Python/stdlib_module_names.h - _Py_stdlib_module_names -
359+
Python/sysmodule.c - perf_map_state -
359360
Python/sysmodule.c - _PySys_ImplCacheTag -
360361
Python/sysmodule.c - _PySys_ImplName -
361362
Python/sysmodule.c - whatstrings -

0 commit comments

Comments
 (0)