Skip to content

Commit dcd6f22

Browse files
gh-100227: Make the Global PyModuleDef Cache Safe for Isolated Interpreters (gh-103084)
Sharing mutable (or non-immortal) objects between interpreters is generally not safe. We can work around that but not easily. There are two restrictions that are critical for objects that break interpreter isolation. The first is that the object's state be guarded by a global lock. For now the GIL meets this requirement, but a granular global lock is needed once we have a per-interpreter GIL. The second restriction is that the object (and, for a container, its items) be deallocated/resized only when the interpreter in which it was allocated is the current one. This is because every interpreter has (or will have, see gh-101660) its own object allocator. Deallocating an object with a different allocator can cause crashes. The dict for the cache of module defs is completely internal, which simplifies what we have to do to meet those requirements. To do so, we do the following: * add a mechanism for re-using a temporary thread state tied to the main interpreter in an arbitrary thread * add _PyRuntime.imports.extensions.main_tstate` * add _PyThreadState_InitDetached() and _PyThreadState_ClearDetached() (pystate.c) * add _PyThreadState_BindDetached() and _PyThreadState_UnbindDetached() (pystate.c) * make sure the cache dict (_PyRuntime.imports.extensions.dict) and its items are all owned by the main interpreter) * add a placeholder using for a granular global lock Note that the cache is only used for legacy extension modules and not for multi-phase init modules. #100227
1 parent 121057a commit dcd6f22

File tree

5 files changed

+274
-59
lines changed

5 files changed

+274
-59
lines changed

Include/internal/pycore_import.h

+13-7
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,19 @@ struct _import_runtime_state {
1414
which is just about every time an extension module is imported.
1515
See PyInterpreterState.modules_by_index for more info. */
1616
Py_ssize_t last_module_index;
17-
/* A dict mapping (filename, name) to PyModuleDef for modules.
18-
Only legacy (single-phase init) extension modules are added
19-
and only if they support multiple initialization (m_size >- 0)
20-
or are imported in the main interpreter.
21-
This is initialized lazily in _PyImport_FixupExtensionObject().
22-
Modules are added there and looked up in _imp.find_extension(). */
23-
PyObject *extensions;
17+
struct {
18+
/* A thread state tied to the main interpreter,
19+
used exclusively for when the extensions dict is access/modified
20+
from an arbitrary thread. */
21+
PyThreadState main_tstate;
22+
/* A dict mapping (filename, name) to PyModuleDef for modules.
23+
Only legacy (single-phase init) extension modules are added
24+
and only if they support multiple initialization (m_size >- 0)
25+
or are imported in the main interpreter.
26+
This is initialized lazily in _PyImport_FixupExtensionObject().
27+
Modules are added there and looked up in _imp.find_extension(). */
28+
PyObject *dict;
29+
} extensions;
2430
/* Package context -- the full module name for package imports */
2531
const char * pkgcontext;
2632
};

Include/internal/pycore_pystate.h

+5
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,11 @@ PyAPI_FUNC(void) _PyThreadState_Init(
127127
PyThreadState *tstate);
128128
PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate);
129129

130+
extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *);
131+
extern void _PyThreadState_ClearDetached(PyThreadState *);
132+
extern void _PyThreadState_BindDetached(PyThreadState *);
133+
extern void _PyThreadState_UnbindDetached(PyThreadState *);
134+
130135

131136
static inline void
132137
_PyThreadState_UpdateTracingState(PyThreadState *tstate)

Include/internal/pycore_runtime_init.h

+5
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ extern PyTypeObject _PyExc_MemoryError;
4141
in accordance with the specification. */ \
4242
.autoTSSkey = Py_tss_NEEDS_INIT, \
4343
.parser = _parser_runtime_state_INIT, \
44+
.imports = { \
45+
.extensions = { \
46+
.main_tstate = _PyThreadState_INIT, \
47+
}, \
48+
}, \
4449
.ceval = { \
4550
.perf = _PyEval_RUNTIME_PERF_INIT, \
4651
}, \

Python/import.c

+145-30
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,18 @@ Generally, when multiple interpreters are involved, some of the above
862862
gets even messier.
863863
*/
864864

865+
static inline void
866+
extensions_lock_acquire(void)
867+
{
868+
// XXX For now the GIL is sufficient.
869+
}
870+
871+
static inline void
872+
extensions_lock_release(void)
873+
{
874+
// XXX For now the GIL is sufficient.
875+
}
876+
865877
/* Magic for extension modules (built-in as well as dynamically
866878
loaded). To prevent initializing an extension module more than
867879
once, we keep a static dictionary 'extensions' keyed by the tuple
@@ -878,71 +890,170 @@ gets even messier.
878890
dictionary, to avoid loading shared libraries twice.
879891
*/
880892

893+
static void
894+
_extensions_cache_init(void)
895+
{
896+
/* The runtime (i.e. main interpreter) must be initializing,
897+
so we don't need to worry about the lock. */
898+
_PyThreadState_InitDetached(&EXTENSIONS.main_tstate,
899+
_PyInterpreterState_Main());
900+
}
901+
881902
static PyModuleDef *
882903
_extensions_cache_get(PyObject *filename, PyObject *name)
883904
{
884-
PyObject *extensions = EXTENSIONS;
885-
if (extensions == NULL) {
886-
return NULL;
887-
}
905+
PyModuleDef *def = NULL;
906+
extensions_lock_acquire();
907+
888908
PyObject *key = PyTuple_Pack(2, filename, name);
889909
if (key == NULL) {
890-
return NULL;
910+
goto finally;
911+
}
912+
913+
PyObject *extensions = EXTENSIONS.dict;
914+
if (extensions == NULL) {
915+
goto finally;
891916
}
892-
PyModuleDef *def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
893-
Py_DECREF(key);
917+
def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
918+
919+
finally:
920+
Py_XDECREF(key);
921+
extensions_lock_release();
894922
return def;
895923
}
896924

897925
static int
898926
_extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def)
899927
{
900-
PyObject *extensions = EXTENSIONS;
928+
int res = -1;
929+
PyThreadState *oldts = NULL;
930+
extensions_lock_acquire();
931+
932+
/* Swap to the main interpreter, if necessary. This matters if
933+
the dict hasn't been created yet or if the item isn't in the
934+
dict yet. In both cases we must ensure the relevant objects
935+
are created using the main interpreter. */
936+
PyThreadState *main_tstate = &EXTENSIONS.main_tstate;
937+
PyInterpreterState *interp = _PyInterpreterState_GET();
938+
if (!_Py_IsMainInterpreter(interp)) {
939+
_PyThreadState_BindDetached(main_tstate);
940+
oldts = _PyThreadState_Swap(interp->runtime, main_tstate);
941+
assert(!_Py_IsMainInterpreter(oldts->interp));
942+
943+
/* Make sure the name and filename objects are owned
944+
by the main interpreter. */
945+
name = PyUnicode_InternFromString(PyUnicode_AsUTF8(name));
946+
assert(name != NULL);
947+
filename = PyUnicode_InternFromString(PyUnicode_AsUTF8(filename));
948+
assert(filename != NULL);
949+
}
950+
951+
PyObject *key = PyTuple_Pack(2, filename, name);
952+
if (key == NULL) {
953+
goto finally;
954+
}
955+
956+
PyObject *extensions = EXTENSIONS.dict;
901957
if (extensions == NULL) {
902958
extensions = PyDict_New();
903959
if (extensions == NULL) {
904-
return -1;
960+
goto finally;
905961
}
906-
EXTENSIONS = extensions;
962+
EXTENSIONS.dict = extensions;
907963
}
908-
PyObject *key = PyTuple_Pack(2, filename, name);
909-
if (key == NULL) {
910-
return -1;
964+
965+
PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
966+
if (PyErr_Occurred()) {
967+
goto finally;
968+
}
969+
else if (actual != NULL) {
970+
/* We expect it to be static, so it must be the same pointer. */
971+
assert(def == actual);
972+
res = 0;
973+
goto finally;
911974
}
912-
int res = PyDict_SetItem(extensions, key, (PyObject *)def);
913-
Py_DECREF(key);
975+
976+
/* This might trigger a resize, which is why we must switch
977+
to the main interpreter. */
978+
res = PyDict_SetItem(extensions, key, (PyObject *)def);
914979
if (res < 0) {
915-
return -1;
980+
res = -1;
981+
goto finally;
916982
}
917-
return 0;
983+
res = 0;
984+
985+
finally:
986+
if (oldts != NULL) {
987+
_PyThreadState_Swap(interp->runtime, oldts);
988+
_PyThreadState_UnbindDetached(main_tstate);
989+
Py_DECREF(name);
990+
Py_DECREF(filename);
991+
}
992+
Py_XDECREF(key);
993+
extensions_lock_release();
994+
return res;
918995
}
919996

920997
static int
921998
_extensions_cache_delete(PyObject *filename, PyObject *name)
922999
{
923-
PyObject *extensions = EXTENSIONS;
924-
if (extensions == NULL) {
925-
return 0;
926-
}
1000+
int res = -1;
1001+
PyThreadState *oldts = NULL;
1002+
extensions_lock_acquire();
1003+
9271004
PyObject *key = PyTuple_Pack(2, filename, name);
9281005
if (key == NULL) {
929-
return -1;
1006+
goto finally;
1007+
}
1008+
1009+
PyObject *extensions = EXTENSIONS.dict;
1010+
if (extensions == NULL) {
1011+
res = 0;
1012+
goto finally;
9301013
}
1014+
1015+
PyModuleDef *actual = (PyModuleDef *)PyDict_GetItemWithError(extensions, key);
1016+
if (PyErr_Occurred()) {
1017+
goto finally;
1018+
}
1019+
else if (actual == NULL) {
1020+
/* It was already removed or never added. */
1021+
res = 0;
1022+
goto finally;
1023+
}
1024+
1025+
/* Swap to the main interpreter, if necessary. */
1026+
PyThreadState *main_tstate = &EXTENSIONS.main_tstate;
1027+
PyInterpreterState *interp = _PyInterpreterState_GET();
1028+
if (!_Py_IsMainInterpreter(interp)) {
1029+
_PyThreadState_BindDetached(main_tstate);
1030+
oldts = _PyThreadState_Swap(interp->runtime, main_tstate);
1031+
assert(!_Py_IsMainInterpreter(oldts->interp));
1032+
}
1033+
9311034
if (PyDict_DelItem(extensions, key) < 0) {
932-
if (!PyErr_ExceptionMatches(PyExc_KeyError)) {
933-
Py_DECREF(key);
934-
return -1;
935-
}
936-
PyErr_Clear();
1035+
goto finally;
9371036
}
938-
Py_DECREF(key);
939-
return 0;
1037+
res = 0;
1038+
1039+
finally:
1040+
if (oldts != NULL) {
1041+
_PyThreadState_Swap(interp->runtime, oldts);
1042+
_PyThreadState_UnbindDetached(main_tstate);
1043+
}
1044+
Py_XDECREF(key);
1045+
extensions_lock_release();
1046+
return res;
9401047
}
9411048

9421049
static void
9431050
_extensions_cache_clear_all(void)
9441051
{
945-
Py_CLEAR(EXTENSIONS);
1052+
/* The runtime (i.e. main interpreter) must be finalizing,
1053+
so we don't need to worry about the lock. */
1054+
// XXX assert(_Py_IsMainInterpreter(_PyInterpreterState_GET()));
1055+
Py_CLEAR(EXTENSIONS.dict);
1056+
_PyThreadState_ClearDetached(&EXTENSIONS.main_tstate);
9461057
}
9471058

9481059

@@ -2941,6 +3052,10 @@ _PyImport_Fini2(void)
29413052
PyStatus
29423053
_PyImport_InitCore(PyThreadState *tstate, PyObject *sysmod, int importlib)
29433054
{
3055+
if (_Py_IsMainInterpreter(tstate->interp)) {
3056+
_extensions_cache_init();
3057+
}
3058+
29443059
// XXX Initialize here: interp->modules and interp->import_func.
29453060
// XXX Initialize here: sys.modules and sys.meta_path.
29463061

0 commit comments

Comments
 (0)