From 1ec055b4284218685a89f9249d8f47d660813160 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 23 Jan 2025 19:22:04 +0000 Subject: [PATCH 1/5] gh-129236: Use `stackpointer` in free threaded GC The stack pointers in interpreter frames are nearly always valid now, so use them when visiting each thread's frame. For now, don't collect objects with deferred references in the rare case that we see a frame with a NULL stack pointer. --- Include/internal/pycore_frame.h | 7 --- Python/gc_free_threading.c | 78 +++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 14dc91e54298ce..3d566a8ad1b7f6 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -159,13 +159,6 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * // Don't leave a dangling pointer to the old frame when creating generators // and coroutines: dest->previous = NULL; - -#ifdef Py_GIL_DISABLED - PyCodeObject *co = _PyFrame_GetCode(dest); - for (int i = stacktop; i < co->co_nlocalsplus + co->co_stacksize; i++) { - dest->localsplus[i] = PyStackRef_NULL; - } -#endif } #ifdef Py_GIL_DISABLED diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index d1023d9351086f..54d37b0ade3481 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -67,6 +67,10 @@ struct collection_state { PyInterpreterState *interp; GCState *gcstate; _PyGC_Reason reason; + // GH-129236: If we see a an active frame without a valid stack pointer, + // we can't collect objects with deferred references because we may not + // see all references. + int skip_deferred_objects; Py_ssize_t collected; Py_ssize_t uncollectable; Py_ssize_t long_lived_total; @@ -413,9 +417,6 @@ gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor, static inline void gc_visit_stackref(_PyStackRef stackref) { - // Note: we MUST check that it is deferred before checking the rest. - // Otherwise we might read into invalid memory due to non-deferred references - // being dead already. if (PyStackRef_IsDeferred(stackref) && !PyStackRef_IsNull(stackref)) { PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref); if (_PyObject_GC_IS_TRACKED(obj) && !gc_is_frozen(obj)) { @@ -426,20 +427,26 @@ gc_visit_stackref(_PyStackRef stackref) // Add 1 to the gc_refs for every deferred reference on each thread's stack. static void -gc_visit_thread_stacks(PyInterpreterState *interp) +gc_visit_thread_stacks(PyInterpreterState *interp, struct collection_state *state) { _Py_FOR_EACH_TSTATE_BEGIN(interp, p) { for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = f->previous) { - PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable); - if (executable == NULL || !PyCode_Check(executable)) { + if (f->owner >= FRAME_OWNED_BY_INTERPRETER) { + continue; + } + + _PyStackRef *top = f->stackpointer; + if (top == NULL) { + // GH-129236: The stackpointer may be NULL. Skip this frame + // and don't collect objects with deferred references. + state->skip_deferred_objects = 1; continue; } - PyCodeObject *co = (PyCodeObject *)executable; - int max_stack = co->co_nlocalsplus + co->co_stacksize; gc_visit_stackref(f->f_executable); - for (int i = 0; i < max_stack; i++) { - gc_visit_stackref(f->localsplus[i]); + while (top != f->localsplus) { + --top; + gc_visit_stackref(*top); } } } @@ -519,10 +526,7 @@ gc_abort_mark_alive(PyInterpreterState *interp, static int gc_visit_stackref_mark_alive(_PyObjectStack *stack, _PyStackRef stackref) { - // Note: we MUST check that it is deferred before checking the rest. - // Otherwise we might read into invalid memory due to non-deferred references - // being dead already. - if (PyStackRef_IsDeferred(stackref) && !PyStackRef_IsNull(stackref)) { + if (!PyStackRef_IsNull(stackref)) { PyObject *op = PyStackRef_AsPyObjectBorrow(stackref); if (mark_alive_stack_push(op, stack) < 0) { return -1; @@ -534,27 +538,37 @@ gc_visit_stackref_mark_alive(_PyObjectStack *stack, _PyStackRef stackref) static int gc_visit_thread_stacks_mark_alive(PyInterpreterState *interp, _PyObjectStack *stack) { + int err = 0; _Py_FOR_EACH_TSTATE_BEGIN(interp, p) { for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = f->previous) { - PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable); - if (executable == NULL || !PyCode_Check(executable)) { + if (f->owner >= FRAME_OWNED_BY_INTERPRETER) { continue; } - PyCodeObject *co = (PyCodeObject *)executable; - int max_stack = co->co_nlocalsplus + co->co_stacksize; + if (f->stackpointer == NULL) { + // GH-129236: The stackpointer may be NULL in cases where + // the GC is run during a PyStackRef_CLOSE() call. Skip this + // frame for now. + continue; + } + + _PyStackRef *top = f->stackpointer; if (gc_visit_stackref_mark_alive(stack, f->f_executable) < 0) { - return -1; + err = -1; + goto exit; } - for (int i = 0; i < max_stack; i++) { - if (gc_visit_stackref_mark_alive(stack, f->localsplus[i]) < 0) { - return -1; + while (top != f->localsplus) { + --top; + if (gc_visit_stackref_mark_alive(stack, *top) < 0) { + err = -1; + goto exit; } } } } +exit: _Py_FOR_EACH_TSTATE_END(interp); - return 0; + return err; } #endif // GC_MARK_ALIVE_STACKS #endif // GC_ENABLE_MARK_ALIVE @@ -789,14 +803,22 @@ mark_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } - if (gc_is_alive(op)) { + _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0, + "refcount is too small"); + + if (gc_is_alive(op) || !gc_is_unreachable(op)) { + // Object was already marked as reachable. return true; } - _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0, - "refcount is too small"); + // GH-129236: Hack to avoid collect objects with deferred references + // if we see a frame without a valid stack pointer. + struct collection_state *state = (struct collection_state *)args; + if (state->skip_deferred_objects && _PyObject_HasDeferredRefcount(op)) { + gc_add_refs(op, 1); + } - if (gc_is_unreachable(op) && gc_get_refs(op) != 0) { + if (gc_get_refs(op) != 0) { // Object is reachable but currently marked as unreachable. // Mark it as reachable and traverse its pointers to find // any other object that may be directly reachable from it. @@ -985,7 +1007,7 @@ deduce_unreachable_heap(PyInterpreterState *interp, #endif // Visit the thread stacks to account for any deferred references. - gc_visit_thread_stacks(interp); + gc_visit_thread_stacks(interp, state); // Transitively mark reachable objects by clearing the // _PyGC_BITS_UNREACHABLE flag. From fcaee8016bb76aebeb4989cd761d66074c636a26 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 24 Jan 2025 15:44:26 +0000 Subject: [PATCH 2/5] Remove additional frame initialization --- Include/internal/pycore_frame.h | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 3d566a8ad1b7f6..802260cc191b43 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -212,16 +212,6 @@ _PyFrame_Initialize( for (int i = null_locals_from; i < code->co_nlocalsplus; i++) { frame->localsplus[i] = PyStackRef_NULL; } - -#ifdef Py_GIL_DISABLED - // On GIL disabled, we walk the entire stack in GC. Since stacktop - // is not always in sync with the real stack pointer, we have - // no choice but to traverse the entire stack. - // This just makes sure we don't pass the GC invalid stack values. - for (int i = code->co_nlocalsplus; i < code->co_nlocalsplus + code->co_stacksize; i++) { - frame->localsplus[i] = PyStackRef_NULL; - } -#endif } /* Gets the pointer to the locals array @@ -392,13 +382,6 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int frame->owner = FRAME_OWNED_BY_THREAD; frame->visited = 0; frame->return_offset = 0; - -#ifdef Py_GIL_DISABLED - assert(code->co_nlocalsplus == 0); - for (int i = 0; i < code->co_stacksize; i++) { - frame->localsplus[i] = PyStackRef_NULL; - } -#endif return frame; } From ca32c7bc539550c1027bc6bd923d1c491699811f Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 24 Jan 2025 14:38:43 -0500 Subject: [PATCH 3/5] Update Python/gc_free_threading.c Co-authored-by: Kumar Aditya --- Python/gc_free_threading.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 54d37b0ade3481..b194c3fb71e0f1 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -67,7 +67,7 @@ struct collection_state { PyInterpreterState *interp; GCState *gcstate; _PyGC_Reason reason; - // GH-129236: If we see a an active frame without a valid stack pointer, + // GH-129236: If we see an active frame without a valid stack pointer, // we can't collect objects with deferred references because we may not // see all references. int skip_deferred_objects; From 1c8a2e388f637ed2fa295a7481a5b699a23c3fdf Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 24 Jan 2025 19:40:53 +0000 Subject: [PATCH 4/5] Update comment --- Python/gc_free_threading.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index b194c3fb71e0f1..9ccdcd6f469b07 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -437,8 +437,9 @@ gc_visit_thread_stacks(PyInterpreterState *interp, struct collection_state *stat _PyStackRef *top = f->stackpointer; if (top == NULL) { - // GH-129236: The stackpointer may be NULL. Skip this frame - // and don't collect objects with deferred references. + // GH-129236: The stackpointer may be NULL in cases where + // the GC is run during a PyStackRef_CLOSE() call. Skip this + // frame and don't collect objects with deferred references. state->skip_deferred_objects = 1; continue; } From 57a34c2d9aa27898a1cffe549e2c03398601b294 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 24 Jan 2025 19:47:13 +0000 Subject: [PATCH 5/5] Update comment and logic in mark_heap_visitor --- Python/gc_free_threading.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 9ccdcd6f469b07..211d52d6cc7164 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -812,14 +812,15 @@ mark_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } - // GH-129236: Hack to avoid collect objects with deferred references - // if we see a frame without a valid stack pointer. + // GH-129236: If we've seen an active frame without a valid stack pointer, + // then we can't collect objects with deferred references because we may + // have missed some reference to the object on the stack. In that case, + // treat the object as reachable even if gc_refs is zero. struct collection_state *state = (struct collection_state *)args; - if (state->skip_deferred_objects && _PyObject_HasDeferredRefcount(op)) { - gc_add_refs(op, 1); - } + int keep_alive = (state->skip_deferred_objects && + _PyObject_HasDeferredRefcount(op)); - if (gc_get_refs(op) != 0) { + if (gc_get_refs(op) != 0 || keep_alive) { // Object is reachable but currently marked as unreachable. // Mark it as reachable and traverse its pointers to find // any other object that may be directly reachable from it.