Skip to content

Commit ed57b36

Browse files
bpo-45953: Statically allocate the main interpreter (and initial thread state). (gh-29883)
Previously, the main interpreter was allocated on the heap during runtime initialization. Here we instead embed it into _PyRuntimeState, which means it is statically allocated as part of the _PyRuntime global. The same goes for the initial thread state (of each interpreter, including the main one). Consequently there are fewer allocations during runtime/interpreter init, fewer possible failures, and better memory locality. FYI, this also helps efforts to consolidate globals, which in turns helps work on subinterpreter isolation. https://bugs.python.org/issue45953
1 parent 0bbf30e commit ed57b36

File tree

8 files changed

+115
-34
lines changed

8 files changed

+115
-34
lines changed

Include/cpython/pystate.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
# error "this header file must not be included directly"
33
#endif
44

5+
#include <stdbool.h>
6+
7+
58
PyAPI_FUNC(int) _PyInterpreterState_RequiresIDRef(PyInterpreterState *);
69
PyAPI_FUNC(void) _PyInterpreterState_RequireIDRef(PyInterpreterState *, int);
710

@@ -83,6 +86,9 @@ struct _ts {
8386
after allocation. */
8487
int _initialized;
8588

89+
/* Was this thread state statically allocated? */
90+
bool _static;
91+
8692
int recursion_remaining;
8793
int recursion_limit;
8894
int recursion_headroom; /* Allow 50 more calls to handle any errors. */
@@ -175,9 +181,11 @@ struct _ts {
175181
PyObject **datastack_top;
176182
PyObject **datastack_limit;
177183
/* XXX signal handlers should also be here */
178-
179184
};
180185

186+
187+
/* other API */
188+
181189
// Alias for backward compatibility with Python 3.8
182190
#define _PyInterpreterState_Get PyInterpreterState_Get
183191

Include/internal/pycore_global_objects.h

-4
Original file line numberDiff line numberDiff line change
@@ -606,10 +606,6 @@ struct _Py_global_objects {
606606
}, \
607607
}
608608

609-
static inline void
610-
_Py_global_objects_reset(struct _Py_global_objects *objects)
611-
{
612-
}
613609

614610
#ifdef __cplusplus
615611
}

Include/internal/pycore_interp.h

+29-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ extern "C" {
88
# error "this header requires Py_BUILD_CORE define"
99
#endif
1010

11+
#include <stdbool.h>
12+
1113
#include "pycore_atomic.h" // _Py_atomic_address
1214
#include "pycore_ast_state.h" // struct ast_state
1315
#include "pycore_context.h" // struct _Py_context_state
@@ -70,13 +72,18 @@ struct atexit_state {
7072

7173
/* interpreter state */
7274

73-
// The PyInterpreterState typedef is in Include/pystate.h.
75+
/* PyInterpreterState holds the global state for one of the runtime's
76+
interpreters. Typically the initial (main) interpreter is the only one.
77+
78+
The PyInterpreterState typedef is in Include/pystate.h.
79+
*/
7480
struct _is {
7581

7682
struct _is *next;
7783

7884
struct pythreads {
7985
uint64_t next_unique_id;
86+
/* The linked list of threads, newest first. */
8087
struct _ts *head;
8188
/* Used in Modules/_threadmodule.c. */
8289
long count;
@@ -104,6 +111,9 @@ struct _is {
104111
int _initialized;
105112
int finalizing;
106113

114+
/* Was this interpreter statically allocated? */
115+
bool _static;
116+
107117
struct _ceval_state ceval;
108118
struct _gc_runtime_state gc;
109119

@@ -166,8 +176,26 @@ struct _is {
166176

167177
struct ast_state ast;
168178
struct type_cache type_cache;
179+
180+
/* The following fields are here to avoid allocation during init.
181+
The data is exposed through PyInterpreterState pointer fields.
182+
These fields should not be accessed directly outside of init.
183+
184+
All other PyInterpreterState pointer fields are populated when
185+
needed and default to NULL.
186+
187+
For now there are some exceptions to that rule, which require
188+
allocation during init. These will be addressed on a case-by-case
189+
basis. Also see _PyRuntimeState regarding the various mutex fields.
190+
*/
191+
192+
/* the initial PyInterpreterState.threads.head */
193+
struct _ts _initial_thread;
169194
};
170195

196+
197+
/* other API */
198+
171199
extern void _PyInterpreterState_ClearModules(PyInterpreterState *interp);
172200
extern void _PyInterpreterState_Clear(PyThreadState *tstate);
173201

Include/internal/pycore_runtime.h

+38-10
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ extern "C" {
1111
#include "pycore_atomic.h" /* _Py_atomic_address */
1212
#include "pycore_gil.h" // struct _gil_runtime_state
1313
#include "pycore_global_objects.h" // struct _Py_global_objects
14+
#include "pycore_interp.h" // struct _is
1415
#include "pycore_unicodeobject.h" // struct _Py_unicode_runtime_ids
1516

17+
1618
/* ceval state */
1719

1820
struct _ceval_runtime_state {
@@ -53,6 +55,9 @@ typedef struct _Py_AuditHookEntry {
5355

5456
/* Full Python runtime state */
5557

58+
/* _PyRuntimeState holds the global state for the CPython runtime.
59+
That data is exposed in the internal API as a static variable (_PyRuntime).
60+
*/
5661
typedef struct pyruntimestate {
5762
/* Has been initialized to a safe state.
5863
@@ -81,7 +86,11 @@ typedef struct pyruntimestate {
8186

8287
struct pyinterpreters {
8388
PyThread_type_lock mutex;
89+
/* The linked list of interpreters, newest first. */
8490
PyInterpreterState *head;
91+
/* The runtime's initial interpreter, which has a special role
92+
in the operation of the runtime. It is also often the only
93+
interpreter. */
8594
PyInterpreterState *main;
8695
/* _next_interp_id is an auto-numbered sequence of small
8796
integers. It gets initialized in _PyInterpreterState_Init(),
@@ -118,25 +127,44 @@ typedef struct pyruntimestate {
118127

119128
struct _Py_unicode_runtime_ids unicode_ids;
120129

130+
/* All the objects that are shared by the runtime's interpreters. */
121131
struct _Py_global_objects global_objects;
122-
// If anything gets added after global_objects then
123-
// _PyRuntimeState_reset() needs to get updated to clear it.
132+
133+
/* The following fields are here to avoid allocation during init.
134+
The data is exposed through _PyRuntimeState pointer fields.
135+
These fields should not be accessed directly outside of init.
136+
137+
All other _PyRuntimeState pointer fields are populated when
138+
needed and default to NULL.
139+
140+
For now there are some exceptions to that rule, which require
141+
allocation during init. These will be addressed on a case-by-case
142+
basis. Most notably, we don't pre-allocated the several mutex
143+
(PyThread_type_lock) fields, because on Windows we only ever get
144+
a pointer type.
145+
*/
146+
147+
/* PyInterpreterState.interpreters.main */
148+
PyInterpreterState _main_interpreter;
124149
} _PyRuntimeState;
125150

151+
#define _PyThreadState_INIT \
152+
{ \
153+
._static = 1, \
154+
}
155+
#define _PyInterpreterState_INIT \
156+
{ \
157+
._static = 1, \
158+
._initial_thread = _PyThreadState_INIT, \
159+
}
126160
#define _PyRuntimeState_INIT \
127161
{ \
128162
.global_objects = _Py_global_objects_INIT, \
163+
._main_interpreter = _PyInterpreterState_INIT, \
129164
}
130-
/* Note: _PyRuntimeState_INIT sets other fields to 0/NULL */
131165

132-
static inline void
133-
_PyRuntimeState_reset(_PyRuntimeState *runtime)
134-
{
135-
/* Make it match _PyRuntimeState_INIT. */
136-
memset(runtime, 0, (size_t)&runtime->global_objects - (size_t)runtime);
137-
_Py_global_objects_reset(&runtime->global_objects);
138-
}
139166

167+
/* other API */
140168

141169
PyAPI_DATA(_PyRuntimeState) _PyRuntime;
142170

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
The main interpreter in _PyRuntimeState.interpreters is now statically
2+
allocated (as part of _PyRuntime). Likewise for the initial thread state of
3+
each interpreter. This means less allocation during runtime init, as well
4+
as better memory locality for these key state objects.

Modules/signalmodule.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ trip_signal(int sig_num)
292292
_Py_atomic_store(&is_tripped, 1);
293293

294294
/* Signals are always handled by the main interpreter */
295-
PyInterpreterState *interp = _PyRuntime.interpreters.main;
295+
PyInterpreterState *interp = _PyInterpreterState_Main();
296296

297297
/* Notify ceval.c */
298298
_PyEval_SignalReceived(interp);

Python/ceval.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ Py_AddPendingCall(int (*func)(void *), void *arg)
617617
}
618618
else {
619619
/* Last resort: use the main interpreter */
620-
interp = _PyRuntime.interpreters.main;
620+
interp = _PyInterpreterState_Main();
621621
}
622622
return _PyEval_AddPendingCall(interp, func, arg);
623623
}

Python/pystate.c

+33-16
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ static PyThreadState *_PyGILState_GetThisThreadState(struct _gilstate_runtime_st
4646
static void _PyThreadState_Delete(PyThreadState *tstate, int check_current);
4747

4848

49+
/* We use "initial" if the runtime gets re-used
50+
(e.g. Py_Finalize() followed by Py_Initialize(). */
51+
static const _PyRuntimeState initial = _PyRuntimeState_INIT;
52+
4953
static int
5054
alloc_for_runtime(PyThread_type_lock *plock1, PyThread_type_lock *plock2,
5155
PyThread_type_lock *plock3)
@@ -91,9 +95,12 @@ init_runtime(_PyRuntimeState *runtime,
9195
PyThread_type_lock xidregistry_mutex)
9296
{
9397
if (runtime->_initialized) {
94-
_PyRuntimeState_reset(runtime);
95-
assert(!runtime->initialized);
98+
Py_FatalError("runtime already initialized");
9699
}
100+
assert(!runtime->preinitializing &&
101+
!runtime->preinitialized &&
102+
!runtime->core_initialized &&
103+
!runtime->initialized);
97104

98105
runtime->open_code_hook = open_code_hook;
99106
runtime->open_code_userdata = open_code_userdata;
@@ -144,6 +151,11 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
144151
return _PyStatus_NO_MEMORY();
145152
}
146153

154+
if (runtime->_initialized) {
155+
// Py_Initialize() must be running again.
156+
// Reset to _PyRuntimeState_INIT.
157+
memcpy(runtime, &initial, sizeof(*runtime));
158+
}
147159
init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head,
148160
unicode_next_index, lock1, lock2, lock3);
149161

@@ -250,13 +262,15 @@ alloc_interpreter(void)
250262
static void
251263
free_interpreter(PyInterpreterState *interp)
252264
{
253-
PyMem_RawFree(interp);
265+
if (!interp->_static) {
266+
PyMem_RawFree(interp);
267+
}
254268
}
255269

256270
/* Get the interpreter state to a minimal consistent state.
257271
Further init happens in pylifecycle.c before it can be used.
258272
All fields not initialized here are expected to be zeroed out,
259-
e.g. by PyMem_RawCalloc() or memset().
273+
e.g. by PyMem_RawCalloc() or memset(), or otherwise pre-initialized.
260274
The runtime state is not manipulated. Instead it is assumed that
261275
the interpreter is getting added to the runtime.
262276
*/
@@ -338,23 +352,23 @@ PyInterpreterState_New(void)
338352
assert(interpreters->main == NULL);
339353
assert(id == 0);
340354

341-
interp = alloc_interpreter();
342-
if (interp == NULL) {
343-
goto error;
344-
}
355+
interp = &runtime->_main_interpreter;
345356
assert(interp->id == 0);
346357
assert(interp->next == NULL);
347358

348359
interpreters->main = interp;
349360
}
350361
else {
351-
assert(id != 0);
352362
assert(interpreters->main != NULL);
363+
assert(id != 0);
353364

354365
interp = alloc_interpreter();
355366
if (interp == NULL) {
356367
goto error;
357368
}
369+
// Set to _PyInterpreterState_INIT.
370+
memcpy(interp, &initial._main_interpreter,
371+
sizeof(*interp));
358372

359373
if (id < 0) {
360374
/* overflow or Py_Initialize() not called yet! */
@@ -735,13 +749,15 @@ alloc_threadstate(void)
735749
static void
736750
free_threadstate(PyThreadState *tstate)
737751
{
738-
PyMem_RawFree(tstate);
752+
if (!tstate->_static) {
753+
PyMem_RawFree(tstate);
754+
}
739755
}
740756

741757
/* Get the thread state to a minimal consistent state.
742758
Further init happens in pylifecycle.c before it can be used.
743759
All fields not initialized here are expected to be zeroed out,
744-
e.g. by PyMem_RawCalloc() or memset().
760+
e.g. by PyMem_RawCalloc() or memset(), or otherwise pre-initialized.
745761
The interpreter state is not manipulated. Instead it is assumed that
746762
the thread is getting added to the interpreter.
747763
*/
@@ -808,10 +824,7 @@ new_threadstate(PyInterpreterState *interp)
808824
// It's the interpreter's initial thread state.
809825
assert(id == 1);
810826

811-
tstate = alloc_threadstate();
812-
if (tstate == NULL) {
813-
goto error;
814-
}
827+
tstate = &interp->_initial_thread;
815828
}
816829
else {
817830
// Every valid interpreter must have at least one thread.
@@ -822,6 +835,10 @@ new_threadstate(PyInterpreterState *interp)
822835
if (tstate == NULL) {
823836
goto error;
824837
}
838+
// Set to _PyThreadState_INIT.
839+
memcpy(tstate,
840+
&initial._main_interpreter._initial_thread,
841+
sizeof(*tstate));
825842
}
826843
interp->threads.head = tstate;
827844

@@ -1159,7 +1176,7 @@ _PyThreadState_DeleteExcept(_PyRuntimeState *runtime, PyThreadState *tstate)
11591176
for (p = list; p; p = next) {
11601177
next = p->next;
11611178
PyThreadState_Clear(p);
1162-
PyMem_RawFree(p);
1179+
free_threadstate(p);
11631180
}
11641181
}
11651182

0 commit comments

Comments
 (0)