Skip to content

Commit e001027

Browse files
gh-117139: Garbage collector support for deferred refcounting (#122956)
The free-threaded GC now visits interpreter stacks to keep objects that use deferred reference counting alive. Interpreter frames are zero initialized in the free-threaded GC so that the GC doesn't see garbage data. This is a temporary measure until stack spilling around escaping calls is implemented. Co-authored-by: Ken Jin <[email protected]>
1 parent 1dad23e commit e001027

File tree

6 files changed

+122
-21
lines changed

6 files changed

+122
-21
lines changed

Include/internal/pycore_frame.h

+24
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *
128128
// Don't leave a dangling pointer to the old frame when creating generators
129129
// and coroutines:
130130
dest->previous = NULL;
131+
132+
#ifdef Py_GIL_DISABLED
133+
PyCodeObject *co = (PyCodeObject *)dest->f_executable;
134+
for (int i = stacktop; i < co->co_nlocalsplus + co->co_stacksize; i++) {
135+
dest->localsplus[i] = PyStackRef_NULL;
136+
}
137+
#endif
131138
}
132139

133140
/* Consumes reference to func and locals.
@@ -153,6 +160,16 @@ _PyFrame_Initialize(
153160
for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
154161
frame->localsplus[i] = PyStackRef_NULL;
155162
}
163+
164+
#ifdef Py_GIL_DISABLED
165+
// On GIL disabled, we walk the entire stack in GC. Since stacktop
166+
// is not always in sync with the real stack pointer, we have
167+
// no choice but to traverse the entire stack.
168+
// This just makes sure we don't pass the GC invalid stack values.
169+
for (int i = code->co_nlocalsplus; i < code->co_nlocalsplus + code->co_stacksize; i++) {
170+
frame->localsplus[i] = PyStackRef_NULL;
171+
}
172+
#endif
156173
}
157174

158175
/* Gets the pointer to the locals array
@@ -314,6 +331,13 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
314331
frame->instr_ptr = _PyCode_CODE(code);
315332
frame->owner = FRAME_OWNED_BY_THREAD;
316333
frame->return_offset = 0;
334+
335+
#ifdef Py_GIL_DISABLED
336+
assert(code->co_nlocalsplus == 0);
337+
for (int i = 0; i < code->co_stacksize; i++) {
338+
frame->localsplus[i] = PyStackRef_NULL;
339+
}
340+
#endif
317341
return frame;
318342
}
319343

Include/internal/pycore_gc.h

+2
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,8 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
381381
extern void _Py_ScheduleGC(PyThreadState *tstate);
382382
extern void _Py_RunGC(PyThreadState *tstate);
383383

384+
// GC visit callback for tracked interpreter frames
385+
extern int _PyGC_VisitFrameStack(struct _PyInterpreterFrame *frame, visitproc visit, void *arg);
384386

385387
#ifdef __cplusplus
386388
}

Include/internal/pycore_stackref.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,7 @@ PyStackRef_FromPyObjectNew(PyObject *obj)
150150
// Make sure we don't take an already tagged value.
151151
assert(((uintptr_t)obj & Py_TAG_BITS) == 0);
152152
assert(obj != NULL);
153-
// TODO (gh-117139): Add deferred objects later.
154-
if (_Py_IsImmortal(obj)) {
153+
if (_Py_IsImmortal(obj) || _PyObject_HasDeferredRefcount(obj)) {
155154
return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_DEFERRED };
156155
}
157156
else {
@@ -220,7 +219,8 @@ PyStackRef_DUP(_PyStackRef stackref)
220219
{
221220
if (PyStackRef_IsDeferred(stackref)) {
222221
assert(PyStackRef_IsNull(stackref) ||
223-
_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)));
222+
_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref)) ||
223+
_PyObject_HasDeferredRefcount(PyStackRef_AsPyObjectBorrow(stackref)));
224224
return stackref;
225225
}
226226
Py_INCREF(PyStackRef_AsPyObjectBorrow(stackref));

Python/frame.c

+1-9
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,7 @@ _PyFrame_Traverse(_PyInterpreterFrame *frame, visitproc visit, void *arg)
1515
Py_VISIT(frame->f_locals);
1616
Py_VISIT(frame->f_funcobj);
1717
Py_VISIT(_PyFrame_GetCode(frame));
18-
/* locals */
19-
_PyStackRef *locals = _PyFrame_GetLocalsArray(frame);
20-
_PyStackRef *sp = frame->stackpointer;
21-
/* locals and stack */
22-
while (sp > locals) {
23-
sp--;
24-
Py_VISIT(PyStackRef_AsPyObjectBorrow(*sp));
25-
}
26-
return 0;
18+
return _PyGC_VisitFrameStack(frame, visit, arg);
2719
}
2820

2921
PyFrameObject *

Python/gc.c

+11
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,17 @@ visit_decref(PyObject *op, void *parent)
534534
return 0;
535535
}
536536

537+
int
538+
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
539+
{
540+
_PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
541+
/* locals and stack */
542+
for (; ref < frame->stackpointer; ref++) {
543+
Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
544+
}
545+
return 0;
546+
}
547+
537548
/* Subtract internal references from gc_refs. After this, gc_refs is >= 0
538549
* for all objects in containers, and is GC_REACHABLE for all tracked gc
539550
* objects not in containers. The ones with gc_refs > 0 are directly

Python/gc_free_threading.c

+81-9
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,31 @@ gc_decref(PyObject *op)
164164
static void
165165
disable_deferred_refcounting(PyObject *op)
166166
{
167-
if (_PyObject_HasDeferredRefcount(op)) {
168-
op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
169-
op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
170-
171-
if (PyType_Check(op)) {
172-
// Disable thread-local refcounting for heap types
173-
PyTypeObject *type = (PyTypeObject *)op;
174-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
175-
_PyType_ReleaseId((PyHeapTypeObject *)op);
167+
if (!_PyObject_HasDeferredRefcount(op)) {
168+
return;
169+
}
170+
171+
op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED;
172+
op->ob_ref_shared -= _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
173+
174+
if (PyType_Check(op)) {
175+
// Disable thread-local refcounting for heap types
176+
PyTypeObject *type = (PyTypeObject *)op;
177+
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
178+
_PyType_ReleaseId((PyHeapTypeObject *)op);
179+
}
180+
}
181+
else if (PyGen_CheckExact(op) || PyCoro_CheckExact(op) || PyAsyncGen_CheckExact(op)) {
182+
// Ensure any non-refcounted pointers in locals are converted to
183+
// strong references. This ensures that the generator/coroutine is not
184+
// freed before its locals.
185+
PyGenObject *gen = (PyGenObject *)op;
186+
struct _PyInterpreterFrame *frame = &gen->gi_iframe;
187+
assert(frame->stackpointer != NULL);
188+
for (_PyStackRef *ref = frame->localsplus; ref < frame->stackpointer; ref++) {
189+
if (!PyStackRef_IsNull(*ref) && PyStackRef_IsDeferred(*ref)) {
190+
// Convert a deferred reference to a strong reference.
191+
*ref = PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(*ref));
176192
}
177193
}
178194
}
@@ -313,6 +329,41 @@ gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor,
313329
return err;
314330
}
315331

332+
static inline void
333+
gc_visit_stackref(_PyStackRef stackref)
334+
{
335+
// Note: we MUST check that it is deferred before checking the rest.
336+
// Otherwise we might read into invalid memory due to non-deferred references
337+
// being dead already.
338+
if (PyStackRef_IsDeferred(stackref) && !PyStackRef_IsNull(stackref)) {
339+
PyObject *obj = PyStackRef_AsPyObjectBorrow(stackref);
340+
if (_PyObject_GC_IS_TRACKED(obj)) {
341+
gc_add_refs(obj, 1);
342+
}
343+
}
344+
}
345+
346+
// Add 1 to the gc_refs for every deferred reference on each thread's stack.
347+
static void
348+
gc_visit_thread_stacks(PyInterpreterState *interp)
349+
{
350+
HEAD_LOCK(&_PyRuntime);
351+
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
352+
_PyInterpreterFrame *f = p->current_frame;
353+
while (f != NULL) {
354+
if (f->f_executable != NULL && PyCode_Check(f->f_executable)) {
355+
PyCodeObject *co = (PyCodeObject *)f->f_executable;
356+
int max_stack = co->co_nlocalsplus + co->co_stacksize;
357+
for (int i = 0; i < max_stack; i++) {
358+
gc_visit_stackref(f->localsplus[i]);
359+
}
360+
}
361+
f = f->previous;
362+
}
363+
}
364+
HEAD_UNLOCK(&_PyRuntime);
365+
}
366+
316367
static void
317368
merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
318369
{
@@ -617,6 +668,9 @@ deduce_unreachable_heap(PyInterpreterState *interp,
617668
gc_visit_heaps(interp, &validate_gc_objects, &state->base);
618669
#endif
619670

671+
// Visit the thread stacks to account for any deferred references.
672+
gc_visit_thread_stacks(interp);
673+
620674
// Transitively mark reachable objects by clearing the
621675
// _PyGC_BITS_UNREACHABLE flag.
622676
if (gc_visit_heaps(interp, &mark_heap_visitor, &state->base) < 0) {
@@ -897,6 +951,24 @@ visit_decref_unreachable(PyObject *op, void *data)
897951
return 0;
898952
}
899953

954+
int
955+
_PyGC_VisitFrameStack(_PyInterpreterFrame *frame, visitproc visit, void *arg)
956+
{
957+
_PyStackRef *ref = _PyFrame_GetLocalsArray(frame);
958+
/* locals and stack */
959+
for (; ref < frame->stackpointer; ref++) {
960+
// This is a bit tricky! We want to ignore deferred references when
961+
// computing the incoming references, but otherwise treat them like
962+
// regular references.
963+
if (PyStackRef_IsDeferred(*ref) &&
964+
(visit == visit_decref || visit == visit_decref_unreachable)) {
965+
continue;
966+
}
967+
Py_VISIT(PyStackRef_AsPyObjectBorrow(*ref));
968+
}
969+
return 0;
970+
}
971+
900972
// Handle objects that may have resurrected after a call to 'finalize_garbage'.
901973
static int
902974
handle_resurrected_objects(struct collection_state *state)

0 commit comments

Comments
 (0)