From 3038a78fc50757dce9ec2ae50a19708a7a9bed42 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sat, 9 Nov 2024 11:30:49 +0000 Subject: [PATCH 01/23] Faster marking of reachable objects --- Python/gc.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 60b41377654af4..4a012d01d1f254 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1433,6 +1433,7 @@ move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) return 0; } + static Py_ssize_t mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) { @@ -1442,18 +1443,77 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) .visited_space = visited_space, .size = 0 }; + Py_ssize_t objects_marked = 0; while (!gc_list_is_empty(reachable)) { PyGC_Head *gc = _PyGCHead_NEXT(reachable); assert(gc_old_space(gc) == visited_space); gc_list_move(gc, visited); PyObject *op = FROM_GC(gc); - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); + assert(PyObject_IS_GC(op)); + switch(Py_TYPE(op)->tp_version_tag) { + case _Py_TYPE_VERSION_LIST: + { + PyListObject *o = (PyListObject *)op; + Py_ssize_t i; + for (i = Py_SIZE(o); --i >= 0; ) { + PyObject *item = o->ob_item[i]; + objects_marked += move_to_reachable(item, reachable, visited_space); + } + break; + } + case _Py_TYPE_VERSION_TUPLE: + { + PyTupleObject *o = (PyTupleObject *)op; + for (Py_ssize_t i = Py_SIZE(o); --i >= 0; ) { + PyObject *item = o->ob_item[i]; + objects_marked += move_to_reachable(item, reachable, visited_space); + } + break; + } + case _Py_TYPE_VERSION_DICT: + { + PyDictObject *mp = (PyDictObject *)op; + PyDictKeysObject *keys = mp->ma_keys; + Py_ssize_t i, n = keys->dk_nentries; + if (DK_IS_UNICODE(keys)) { + if (_PyDict_HasSplitTable(mp)) { + if (!mp->ma_values->embedded) { + for (i = 0; i < n; i++) { + PyObject *value = mp->ma_values->values[i]; + objects_marked += move_to_reachable(value, reachable, visited_space); + } + } + } + else { + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); + for (i = 0; i < n; i++) { + PyObject *value = entries[i].me_value; + objects_marked += move_to_reachable(value, reachable, visited_space); + } + } + } + else { + PyDictKeyEntry *entries = DK_ENTRIES(keys); + for (i = 0; i < n; i++) { + if (entries[i].me_value != NULL) { + PyObject *key = entries[i].me_key; + objects_marked += move_to_reachable(key, reachable, visited_space); + PyObject *value = entries[i].me_value; + objects_marked += move_to_reachable(value, reachable, visited_space); + } + } + } + break; + } + default: + { + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, visit_add_to_container, &arg); + } + } } gc_list_validate_space(visited, visited_space); - return arg.size; + return objects_marked + arg.size; } static Py_ssize_t From c024484826464a563bea4e379f23ad870c3b5c95 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 10 Nov 2024 10:12:39 +0000 Subject: [PATCH 02/23] Handle more classes in fast marking --- Include/internal/pycore_typeobject.h | 4 + Objects/genobject.c | 41 +---------- Objects/moduleobject.c | 1 + Objects/setobject.c | 5 ++ Objects/typeobject.c | 1 + Python/gc.c | 106 +++++++++++++++++++++------ 6 files changed, 100 insertions(+), 58 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 5debdd68fe94ca..431466c7b0bba0 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -25,6 +25,10 @@ extern "C" { #define _Py_TYPE_VERSION_BYTEARRAY 9 #define _Py_TYPE_VERSION_BYTES 10 #define _Py_TYPE_VERSION_COMPLEX 11 +#define _Py_TYPE_VERSION_GENERATOR 12 +#define _Py_TYPE_VERSION_COROUTINE 13 +#define _Py_TYPE_VERSION_MODULE 14 +#define _Py_TYPE_VERSION_TYPE 15 #define _Py_TYPE_VERSION_NEXT 16 diff --git a/Objects/genobject.c b/Objects/genobject.c index 19c2c4e3331a89..33fbea1a5fe6f4 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -876,25 +876,8 @@ PyTypeObject PyGen_Type = { gen_methods, /* tp_methods */ gen_memberlist, /* tp_members */ gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, + .tp_version_tag = _Py_TYPE_VERSION_GENERATOR, }; static PyObject * @@ -1236,24 +1219,8 @@ PyTypeObject PyCoro_Type = { coro_methods, /* tp_methods */ coro_memberlist, /* tp_members */ coro_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, + .tp_version_tag = _Py_TYPE_VERSION_COROUTINE, }; static void diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index a8d64c9aefae6b..dee3d2d0395191 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -1356,4 +1356,5 @@ PyTypeObject PyModule_Type = { 0, /* tp_alloc */ new_module, /* tp_new */ PyObject_GC_Del, /* tp_free */ + .tp_version_tag = _Py_TYPE_VERSION_MODULE, }; diff --git a/Objects/setobject.c b/Objects/setobject.c index 955ccbebf74b54..a057d19709132e 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -492,6 +492,11 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) return 1; } +int _PyTemporary_SetNext(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) +{ + return set_next(so, pos_ptr, entry_ptr); +} + static void set_dealloc(PyObject *self) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a6cf3da542b691..ef40ebb6d36c67 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6351,6 +6351,7 @@ PyTypeObject PyType_Type = { PyObject_GC_Del, /* tp_free */ type_is_gc, /* tp_is_gc */ .tp_vectorcall = type_vectorcall, + .tp_version_tag = _Py_TYPE_VERSION_TYPE, }; diff --git a/Python/gc.c b/Python/gc.c index 4a012d01d1f254..6ad4c4df861058 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1433,6 +1433,40 @@ move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) return 0; } +static Py_ssize_t +move_frame_to_reachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int visited_space) +{ + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + Py_ssize_t objects_marked = move_to_reachable(frame->f_locals, reachable, visited_space); + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + objects_marked += move_to_reachable(func, reachable, visited_space); + while (sp > locals) { + sp--; + if (PyStackRef_IsNull(*sp)) { + continue; + } + PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + objects_marked++; + gc_list_move(gc, reachable); + } + } + } + return objects_marked; +} + +/* This should be refactored: + * 1. Make `move_to_reachable` an inline "private API" function + * 2. Move bodies of each case below into a _PyXXX_MarkReachable function in file for type XXX. + * 3. Trust lto to inline those functions. + */ + +extern int _PyTemporary_SetNext(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr); static Py_ssize_t mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) @@ -1470,6 +1504,19 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) } break; } + case _Py_TYPE_VERSION_MODULE: + { + PyModuleObject *m = (PyModuleObject *)op; + /* bpo-39824: Don't call m_traverse() if m_size > 0 and md_state=NULL */ + if (m->md_def && m->md_def->m_traverse + && (m->md_def->m_size <= 0 || m->md_state != NULL)) + { + m->md_def->m_traverse(op, visit_add_to_container, &arg); + } + op = m->md_dict; + assert (op != NULL); + } + /* fall through */ case _Py_TYPE_VERSION_DICT: { PyDictObject *mp = (PyDictObject *)op; @@ -1505,6 +1552,43 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) } break; } + case _Py_TYPE_VERSION_SET: + case _Py_TYPE_VERSION_FROZEN_SET: + { + PySetObject *so = (PySetObject *)op; + Py_ssize_t pos = 0; + setentry *entry; + while (_PyTemporary_SetNext(so, &pos, &entry)) { + objects_marked += move_to_reachable(entry->key, reachable, visited_space); + } + break; + } + case _Py_TYPE_VERSION_COROUTINE: + case _Py_TYPE_VERSION_GENERATOR: + { + PyGenObject *gen = (PyGenObject *)op; + if (gen->gi_frame_state == FRAME_CLEARED) { + break; + } + objects_marked += move_to_reachable(gen->gi_exc_state.exc_value, reachable, visited_space); + if (gen->gi_frame_state != FRAME_EXECUTING) { + /* if executing we already traversed it on the stack */ + _PyInterpreterFrame *frame = &gen->gi_iframe; + objects_marked += move_frame_to_reachable(frame, reachable, visited_space); + } + break; + } + case _Py_TYPE_VERSION_TYPE: + { + PyTypeObject *type = (PyTypeObject *)op; + objects_marked += move_to_reachable(type->tp_dict, reachable, visited_space); + objects_marked += move_to_reachable(type->tp_cache, reachable, visited_space); + objects_marked += move_to_reachable(type->tp_mro, reachable, visited_space); + objects_marked += move_to_reachable(type->tp_bases, reachable, visited_space); + objects_marked += move_to_reachable((PyObject *)type->tp_base, reachable, visited_space); + objects_marked += move_to_reachable(((PyHeapTypeObject *)type)->ht_module, reachable, visited_space); + break; + } default: { traverseproc traverse = Py_TYPE(op)->tp_traverse; @@ -1557,27 +1641,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - _PyStackRef *locals = frame->localsplus; - _PyStackRef *sp = frame->stackpointer; - objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); - PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - objects_marked += move_to_reachable(func, &reachable, visited_space); - while (sp > locals) { - sp--; - if (PyStackRef_IsNull(*sp)) { - continue; - } - PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - objects_marked++; - gc_list_move(gc, &reachable); - } - } - } + objects_marked += move_frame_to_reachable(frame, &reachable, visited_space); if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed From e8497ae4285efee06ee97bbb02bf851e9d7bfba3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 11 Nov 2024 14:35:58 +0000 Subject: [PATCH 03/23] Add support for asyn generators on fast path. Simplify counting --- Include/internal/pycore_frame.h | 3 + Include/internal/pycore_gc.h | 2 + Include/internal/pycore_typeobject.h | 4 +- Objects/dictobject.c | 36 +++++++ Objects/genobject.c | 63 ++++++------ Objects/listobject.c | 11 +++ Objects/setobject.c | 16 ++- Objects/tupleobject.c | 10 ++ Objects/typeobject.c | 13 +++ Python/gc.c | 142 ++++++++------------------- 10 files changed, 165 insertions(+), 135 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index b786c5f49e9831..45bf6e06ef4223 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -409,6 +409,9 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func, size_t argcount, PyObject *kwnames, _PyInterpreterFrame *previous); +void +_PyFrame_MoveToReachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int visited_space); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index c81e7a1de4b727..a1ac940d6900c1 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -401,6 +401,8 @@ extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp, gcvisitobjects_t callback, void *arg); #endif +void _PyGC_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 431466c7b0bba0..4b8ea552fc3e58 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -29,8 +29,10 @@ extern "C" { #define _Py_TYPE_VERSION_COROUTINE 13 #define _Py_TYPE_VERSION_MODULE 14 #define _Py_TYPE_VERSION_TYPE 15 +#define _Py_TYPE_VERSION_ASYNC_GENERATOR 16 +#define _Py_TYPE_VERSION_ASYNC_ASEND 17 -#define _Py_TYPE_VERSION_NEXT 16 +#define _Py_TYPE_VERSION_NEXT 20 #define _Py_TYPE_BASE_VERSION_TAG (2<<16) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 19a3ba9e13e5cc..0e43498ed75f22 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -4475,6 +4475,42 @@ dict_popitem_impl(PyDictObject *self) return res; } +void +_PyDict_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + PyDictObject *mp = (PyDictObject *)op; + PyDictKeysObject *keys = mp->ma_keys; + Py_ssize_t i, n = keys->dk_nentries; + if (DK_IS_UNICODE(keys)) { + if (_PyDict_HasSplitTable(mp)) { + if (!mp->ma_values->embedded) { + for (i = 0; i < n; i++) { + PyObject *value = mp->ma_values->values[i]; + _PyGC_MoveToReachable(value, reachable, visited_space); + } + } + } + else { + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); + for (i = 0; i < n; i++) { + PyObject *value = entries[i].me_value; + _PyGC_MoveToReachable(value, reachable, visited_space); + } + } + } + else { + PyDictKeyEntry *entries = DK_ENTRIES(keys); + for (i = 0; i < n; i++) { + if (entries[i].me_value != NULL) { + PyObject *key = entries[i].me_key; + _PyGC_MoveToReachable(key, reachable, visited_space); + PyObject *value = entries[i].me_value; + _PyGC_MoveToReachable(value, reachable, visited_space); + } + } + } +} + static int dict_traverse(PyObject *op, visitproc visit, void *arg) { diff --git a/Objects/genobject.c b/Objects/genobject.c index 33fbea1a5fe6f4..a339f7e99b0c72 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -78,6 +78,22 @@ gen_traverse(PyObject *self, visitproc visit, void *arg) return 0; } +void +_PyGen_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + PyGenObject *gen = (PyGenObject *)op; + if (gen->gi_frame_state == FRAME_CLEARED) { + return; + } + _PyGC_MoveToReachable(gen->gi_exc_state.exc_value, reachable, visited_space); + if (gen->gi_frame_state == FRAME_EXECUTING) { + /* if executing we already traversed it on the stack */ + return; + } + _PyInterpreterFrame *frame = &gen->gi_iframe; + _PyFrame_MoveToReachable(frame, reachable, visited_space); +} + void _PyGen_Finalize(PyObject *self) { @@ -1426,6 +1442,14 @@ typedef struct _PyAsyncGenWrappedValue { _Py_CAST(_PyAsyncGenWrappedValue*, (op))) +void +_PyAsyncGen_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + PyAsyncGenObject *ag = _PyAsyncGenObject_CAST(op); + _PyGC_MoveToReachable(ag->ag_origin_or_finalizer, reachable, visited_space); + _PyGen_MoveToReachable(op, reachable, visited_space); +} + static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1634,24 +1658,8 @@ PyTypeObject PyAsyncGen_Type = { async_gen_methods, /* tp_methods */ async_gen_memberlist, /* tp_members */ async_gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, + .tp_version_tag = _Py_TYPE_VERSION_ASYNC_GENERATOR, }; @@ -1724,6 +1732,14 @@ async_gen_asend_dealloc(PyObject *self) _Py_FREELIST_FREE(async_gen_asends, self, PyObject_GC_Del); } +void +_PyAsyncAsend_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + PyAsyncGenASend *ags = _PyAsyncGenASend_CAST(op); + _PyGC_MoveToReachable((PyObject *)ags->ags_sendval, reachable, visited_space); + _PyAsyncGen_MoveToReachable((PyObject *)ags->ags_gen, reachable, visited_space); +} + static int async_gen_asend_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1896,17 +1912,8 @@ PyTypeObject _PyAsyncGenASend_Type = { PyObject_SelfIter, /* tp_iter */ async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ .tp_finalize = async_gen_asend_finalize, + .tp_version_tag = _Py_TYPE_VERSION_ASYNC_ASEND, }; diff --git a/Objects/listobject.c b/Objects/listobject.c index bb0040cbe9f272..dbadfaa056c9de 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3276,6 +3276,17 @@ list_remove_impl(PyListObject *self, PyObject *value) return NULL; } +void +_PyList_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + PyListObject *o = (PyListObject *)op; + Py_ssize_t i; + for (i = Py_SIZE(o); --i >= 0; ) { + PyObject *item = o->ob_item[i]; + _PyGC_MoveToReachable(item, reachable, visited_space); + } +} + static int list_traverse(PyObject *self, visitproc visit, void *arg) { diff --git a/Objects/setobject.c b/Objects/setobject.c index a057d19709132e..4404ed3b8cc37b 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -492,11 +492,6 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) return 1; } -int _PyTemporary_SetNext(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) -{ - return set_next(so, pos_ptr, entry_ptr); -} - static void set_dealloc(PyObject *self) { @@ -705,6 +700,17 @@ set_traverse(PyObject *self, visitproc visit, void *arg) return 0; } +void +_PySet_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + PySetObject *so = (PySetObject *)op; + Py_ssize_t pos = 0; + setentry *entry; + while (set_next(so, &pos, &entry)) { + _PyGC_MoveToReachable(entry->key, reachable, visited_space); + } +} + /* Work to increase the bit dispersion for closely spaced hash values. This is important because some use cases have many combinations of a small number of elements with nearby hashes so that many distinct diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 193914d54bd90e..b108ed582d8fc0 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -623,6 +623,16 @@ tuple_count(PyTupleObject *self, PyObject *value) return PyLong_FromSsize_t(count); } +void +_PyTuple_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + PyTupleObject *o = (PyTupleObject *)op; + for (Py_ssize_t i = Py_SIZE(o); --i >= 0; ) { + PyObject *item = o->ob_item[i]; + _PyGC_MoveToReachable(item, reachable, visited_space); + } +} + static int tuple_traverse(PyObject *self, visitproc visit, void *arg) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index ef40ebb6d36c67..ee8629d59fe555 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6214,6 +6214,19 @@ PyDoc_STRVAR(type_doc, "type(object) -> the object's type\n" "type(name, bases, dict, **kwds) -> a new type"); + +void +_PyType_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + PyTypeObject *type = (PyTypeObject *)op; + _PyGC_MoveToReachable(type->tp_dict, reachable, visited_space); + _PyGC_MoveToReachable(type->tp_cache, reachable, visited_space); + _PyGC_MoveToReachable(type->tp_mro, reachable, visited_space); + _PyGC_MoveToReachable(type->tp_bases, reachable, visited_space); + _PyGC_MoveToReachable((PyObject *)type->tp_base, reachable, visited_space); + _PyGC_MoveToReachable(((PyHeapTypeObject *)type)->ht_module, reachable, visited_space); +} + static int type_traverse(PyObject *self, visitproc visit, void *arg) { diff --git a/Python/gc.c b/Python/gc.c index 6ad4c4df861058..2c00d83b789b24 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1418,8 +1418,8 @@ completed_cycle(GCState *gcstate) gcstate->phase = GC_PHASE_MARK; } -static Py_ssize_t -move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) +void +_PyGC_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) { if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); @@ -1427,20 +1427,18 @@ move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) gc_old_space(gc) != visited_space) { gc_flip_old_space(gc); gc_list_move(gc, reachable); - return 1; } } - return 0; } -static Py_ssize_t -move_frame_to_reachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int visited_space) +void +_PyFrame_MoveToReachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int visited_space) { _PyStackRef *locals = frame->localsplus; _PyStackRef *sp = frame->stackpointer; - Py_ssize_t objects_marked = move_to_reachable(frame->f_locals, reachable, visited_space); + _PyGC_MoveToReachable(frame->f_locals, reachable, visited_space); PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - objects_marked += move_to_reachable(func, reachable, visited_space); + _PyGC_MoveToReachable(func, reachable, visited_space); while (sp > locals) { sp--; if (PyStackRef_IsNull(*sp)) { @@ -1452,12 +1450,10 @@ move_frame_to_reachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int vi if (_PyObject_GC_IS_TRACKED(op) && gc_old_space(gc) != visited_space) { gc_flip_old_space(gc); - objects_marked++; gc_list_move(gc, reachable); } } } - return objects_marked; } /* This should be refactored: @@ -1466,7 +1462,15 @@ move_frame_to_reachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int vi * 3. Trust lto to inline those functions. */ -extern int _PyTemporary_SetNext(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr); +extern void _PySet_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); +extern void _PyDict_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); +extern void _PyGen_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); +extern void _PyAsyncGen_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); +extern void _PyAsyncAsend_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); +extern void _PyList_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); +extern void _PyTuple_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); +extern void _PyType_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); + static Py_ssize_t mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) @@ -1479,31 +1483,20 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) }; Py_ssize_t objects_marked = 0; while (!gc_list_is_empty(reachable)) { - PyGC_Head *gc = _PyGCHead_NEXT(reachable); + /* Pop from the back of the list, to do depth first traversal */ + PyGC_Head *gc = _PyGCHead_PREV(reachable); assert(gc_old_space(gc) == visited_space); gc_list_move(gc, visited); + objects_marked++; PyObject *op = FROM_GC(gc); assert(PyObject_IS_GC(op)); switch(Py_TYPE(op)->tp_version_tag) { case _Py_TYPE_VERSION_LIST: - { - PyListObject *o = (PyListObject *)op; - Py_ssize_t i; - for (i = Py_SIZE(o); --i >= 0; ) { - PyObject *item = o->ob_item[i]; - objects_marked += move_to_reachable(item, reachable, visited_space); - } + _PyList_MoveToReachable(op, reachable, visited_space); break; - } case _Py_TYPE_VERSION_TUPLE: - { - PyTupleObject *o = (PyTupleObject *)op; - for (Py_ssize_t i = Py_SIZE(o); --i >= 0; ) { - PyObject *item = o->ob_item[i]; - objects_marked += move_to_reachable(item, reachable, visited_space); - } + _PyTuple_MoveToReachable(op, reachable, visited_space); break; - } case _Py_TYPE_VERSION_MODULE: { PyModuleObject *m = (PyModuleObject *)op; @@ -1518,77 +1511,25 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) } /* fall through */ case _Py_TYPE_VERSION_DICT: - { - PyDictObject *mp = (PyDictObject *)op; - PyDictKeysObject *keys = mp->ma_keys; - Py_ssize_t i, n = keys->dk_nentries; - if (DK_IS_UNICODE(keys)) { - if (_PyDict_HasSplitTable(mp)) { - if (!mp->ma_values->embedded) { - for (i = 0; i < n; i++) { - PyObject *value = mp->ma_values->values[i]; - objects_marked += move_to_reachable(value, reachable, visited_space); - } - } - } - else { - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); - for (i = 0; i < n; i++) { - PyObject *value = entries[i].me_value; - objects_marked += move_to_reachable(value, reachable, visited_space); - } - } - } - else { - PyDictKeyEntry *entries = DK_ENTRIES(keys); - for (i = 0; i < n; i++) { - if (entries[i].me_value != NULL) { - PyObject *key = entries[i].me_key; - objects_marked += move_to_reachable(key, reachable, visited_space); - PyObject *value = entries[i].me_value; - objects_marked += move_to_reachable(value, reachable, visited_space); - } - } - } + _PyDict_MoveToReachable(op, reachable, visited_space); break; - } case _Py_TYPE_VERSION_SET: case _Py_TYPE_VERSION_FROZEN_SET: - { - PySetObject *so = (PySetObject *)op; - Py_ssize_t pos = 0; - setentry *entry; - while (_PyTemporary_SetNext(so, &pos, &entry)) { - objects_marked += move_to_reachable(entry->key, reachable, visited_space); - } + _PySet_MoveToReachable(op, reachable, visited_space); + break; + case _Py_TYPE_VERSION_ASYNC_GENERATOR: + _PyAsyncGen_MoveToReachable(op, reachable, visited_space); + break; + case _Py_TYPE_VERSION_ASYNC_ASEND: + _PyAsyncAsend_MoveToReachable(op, reachable, visited_space); break; - } case _Py_TYPE_VERSION_COROUTINE: case _Py_TYPE_VERSION_GENERATOR: - { - PyGenObject *gen = (PyGenObject *)op; - if (gen->gi_frame_state == FRAME_CLEARED) { - break; - } - objects_marked += move_to_reachable(gen->gi_exc_state.exc_value, reachable, visited_space); - if (gen->gi_frame_state != FRAME_EXECUTING) { - /* if executing we already traversed it on the stack */ - _PyInterpreterFrame *frame = &gen->gi_iframe; - objects_marked += move_frame_to_reachable(frame, reachable, visited_space); - } + _PyGen_MoveToReachable(op, reachable, visited_space); break; - } case _Py_TYPE_VERSION_TYPE: - { - PyTypeObject *type = (PyTypeObject *)op; - objects_marked += move_to_reachable(type->tp_dict, reachable, visited_space); - objects_marked += move_to_reachable(type->tp_cache, reachable, visited_space); - objects_marked += move_to_reachable(type->tp_mro, reachable, visited_space); - objects_marked += move_to_reachable(type->tp_bases, reachable, visited_space); - objects_marked += move_to_reachable((PyObject *)type->tp_base, reachable, visited_space); - objects_marked += move_to_reachable(((PyHeapTypeObject *)type)->ht_module, reachable, visited_space); + _PyType_MoveToReachable(op, reachable, visited_space); break; - } default: { traverseproc traverse = Py_TYPE(op)->tp_traverse; @@ -1597,7 +1538,7 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) } } gc_list_validate_space(visited, visited_space); - return objects_marked + arg.size; + return objects_marked; } static Py_ssize_t @@ -1606,21 +1547,21 @@ mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_sp PyGC_Head reachable; gc_list_init(&reachable); Py_ssize_t objects_marked = 0; - objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); - objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); - objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + _PyGC_MoveToReachable(interp->sysdict, &reachable, visited_space); + _PyGC_MoveToReachable(interp->builtins, &reachable, visited_space); + _PyGC_MoveToReachable(interp->dict, &reachable, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + _PyGC_MoveToReachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); + _PyGC_MoveToReachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + _PyGC_MoveToReachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); + _PyGC_MoveToReachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); } objects_marked += mark_all_reachable(&reachable, visited, visited_space); assert(gc_list_is_empty(&reachable)); - return objects_marked; + return objects_marked + 20; } static Py_ssize_t @@ -1628,7 +1569,6 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b { PyGC_Head reachable; gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1641,7 +1581,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - objects_marked += move_frame_to_reachable(frame, &reachable, visited_space); + _PyFrame_MoveToReachable(frame, &reachable, visited_space); if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1654,7 +1594,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); + Py_ssize_t objects_marked = mark_all_reachable(&reachable, visited, visited_space); assert(gc_list_is_empty(&reachable)); return objects_marked; } From 4c1a6bcf5e30a4801e4cc6cfb8617fc944f1183b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 11 Nov 2024 16:16:46 +0000 Subject: [PATCH 04/23] Check stackref before converting to PyObject * --- Include/internal/pycore_stackref.h | 2 ++ Python/gc.c | 28 ++++++++++++---------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 588e57f6cd97e0..859cdcaf07d984 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -190,6 +190,8 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; #endif // Py_GIL_DISABLED +#define PyStackRef_IsNonNullMortal(stackref) (!PyStackRef_IsNull(stackref) && !_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref))) + // Note: this is a macro because MSVC (Windows) has trouble inlining it. #define PyStackRef_Is(a, b) ((a).bits == (b).bits) diff --git a/Python/gc.c b/Python/gc.c index 2c00d83b789b24..1f85c9b7708cd0 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1431,6 +1431,8 @@ _PyGC_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) } } +/* TO DO -- Move this into pycore_stackref.h */ + void _PyFrame_MoveToReachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int visited_space) { @@ -1441,27 +1443,21 @@ _PyFrame_MoveToReachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int v _PyGC_MoveToReachable(func, reachable, visited_space); while (sp > locals) { sp--; - if (PyStackRef_IsNull(*sp)) { - continue; - } - PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, reachable); + _PyStackRef ref = *sp; + if (PyStackRef_IsNonNullMortal(ref)) { + PyObject *op = PyStackRef_AsPyObjectBorrow(ref); + if (_PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, reachable); + } } } } } -/* This should be refactored: - * 1. Make `move_to_reachable` an inline "private API" function - * 2. Move bodies of each case below into a _PyXXX_MarkReachable function in file for type XXX. - * 3. Trust lto to inline those functions. - */ - extern void _PySet_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); extern void _PyDict_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); extern void _PyGen_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); From 6efb4c0e6c0e34e93900c49e622250fdb94b390f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 13 Nov 2024 12:22:47 +0000 Subject: [PATCH 05/23] Rename stuff --- Include/internal/pycore_frame.h | 2 +- Include/internal/pycore_gc.h | 2 +- Objects/dictobject.c | 10 ++--- Objects/genobject.c | 18 ++++---- Objects/listobject.c | 4 +- Objects/setobject.c | 4 +- Objects/tupleobject.c | 4 +- Objects/typeobject.c | 14 +++--- Python/gc.c | 77 ++++++++++++++++++--------------- 9 files changed, 71 insertions(+), 64 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 45bf6e06ef4223..3fbdb68ac543c3 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -410,7 +410,7 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func, _PyInterpreterFrame *previous); void -_PyFrame_MoveToReachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int visited_space); +_PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space); #ifdef __cplusplus } diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index a1ac940d6900c1..e2d50fb7c8a8a7 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -401,7 +401,7 @@ extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp, gcvisitobjects_t callback, void *arg); #endif -void _PyGC_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); +void _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); #ifdef __cplusplus } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 0e43498ed75f22..794c62f11b8dc6 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -4476,7 +4476,7 @@ dict_popitem_impl(PyDictObject *self) } void -_PyDict_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +_PyDict_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { PyDictObject *mp = (PyDictObject *)op; PyDictKeysObject *keys = mp->ma_keys; @@ -4486,7 +4486,7 @@ _PyDict_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) if (!mp->ma_values->embedded) { for (i = 0; i < n; i++) { PyObject *value = mp->ma_values->values[i]; - _PyGC_MoveToReachable(value, reachable, visited_space); + _PyGC_MoveUnvisited(value, to, visited_space); } } } @@ -4494,7 +4494,7 @@ _PyDict_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); for (i = 0; i < n; i++) { PyObject *value = entries[i].me_value; - _PyGC_MoveToReachable(value, reachable, visited_space); + _PyGC_MoveUnvisited(value, to, visited_space); } } } @@ -4503,9 +4503,9 @@ _PyDict_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) for (i = 0; i < n; i++) { if (entries[i].me_value != NULL) { PyObject *key = entries[i].me_key; - _PyGC_MoveToReachable(key, reachable, visited_space); + _PyGC_MoveUnvisited(key, to, visited_space); PyObject *value = entries[i].me_value; - _PyGC_MoveToReachable(value, reachable, visited_space); + _PyGC_MoveUnvisited(value, to, visited_space); } } } diff --git a/Objects/genobject.c b/Objects/genobject.c index a339f7e99b0c72..2d5e2e094afb00 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -79,19 +79,19 @@ gen_traverse(PyObject *self, visitproc visit, void *arg) } void -_PyGen_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +_PyGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { PyGenObject *gen = (PyGenObject *)op; if (gen->gi_frame_state == FRAME_CLEARED) { return; } - _PyGC_MoveToReachable(gen->gi_exc_state.exc_value, reachable, visited_space); + _PyGC_MoveUnvisited(gen->gi_exc_state.exc_value, to, visited_space); if (gen->gi_frame_state == FRAME_EXECUTING) { /* if executing we already traversed it on the stack */ return; } _PyInterpreterFrame *frame = &gen->gi_iframe; - _PyFrame_MoveToReachable(frame, reachable, visited_space); + _PyFrame_MoveUnvisited(frame, to, visited_space); } void @@ -1443,11 +1443,11 @@ typedef struct _PyAsyncGenWrappedValue { void -_PyAsyncGen_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +_PyAsyncGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { PyAsyncGenObject *ag = _PyAsyncGenObject_CAST(op); - _PyGC_MoveToReachable(ag->ag_origin_or_finalizer, reachable, visited_space); - _PyGen_MoveToReachable(op, reachable, visited_space); + _PyGC_MoveUnvisited(ag->ag_origin_or_finalizer, to, visited_space); + _PyGen_MoveUnvisited(op, to, visited_space); } static int @@ -1733,11 +1733,11 @@ async_gen_asend_dealloc(PyObject *self) } void -_PyAsyncAsend_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +_PyAsyncAsend_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { PyAsyncGenASend *ags = _PyAsyncGenASend_CAST(op); - _PyGC_MoveToReachable((PyObject *)ags->ags_sendval, reachable, visited_space); - _PyAsyncGen_MoveToReachable((PyObject *)ags->ags_gen, reachable, visited_space); + _PyGC_MoveUnvisited((PyObject *)ags->ags_sendval, to, visited_space); + _PyAsyncGen_MoveUnvisited((PyObject *)ags->ags_gen, to, visited_space); } static int diff --git a/Objects/listobject.c b/Objects/listobject.c index dbadfaa056c9de..a1d7598998b801 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3277,13 +3277,13 @@ list_remove_impl(PyListObject *self, PyObject *value) } void -_PyList_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +_PyList_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { PyListObject *o = (PyListObject *)op; Py_ssize_t i; for (i = Py_SIZE(o); --i >= 0; ) { PyObject *item = o->ob_item[i]; - _PyGC_MoveToReachable(item, reachable, visited_space); + _PyGC_MoveUnvisited(item, to, visited_space); } } diff --git a/Objects/setobject.c b/Objects/setobject.c index 4404ed3b8cc37b..05272be07135ad 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -701,13 +701,13 @@ set_traverse(PyObject *self, visitproc visit, void *arg) } void -_PySet_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +_PySet_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { PySetObject *so = (PySetObject *)op; Py_ssize_t pos = 0; setentry *entry; while (set_next(so, &pos, &entry)) { - _PyGC_MoveToReachable(entry->key, reachable, visited_space); + _PyGC_MoveUnvisited(entry->key, to, visited_space); } } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index b108ed582d8fc0..f8be03b3e03035 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -624,12 +624,12 @@ tuple_count(PyTupleObject *self, PyObject *value) } void -_PyTuple_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +_PyTuple_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { PyTupleObject *o = (PyTupleObject *)op; for (Py_ssize_t i = Py_SIZE(o); --i >= 0; ) { PyObject *item = o->ob_item[i]; - _PyGC_MoveToReachable(item, reachable, visited_space); + _PyGC_MoveUnvisited(item, to, visited_space); } } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index ee8629d59fe555..cf0209a5e22d40 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6216,15 +6216,15 @@ PyDoc_STRVAR(type_doc, void -_PyType_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +_PyType_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { PyTypeObject *type = (PyTypeObject *)op; - _PyGC_MoveToReachable(type->tp_dict, reachable, visited_space); - _PyGC_MoveToReachable(type->tp_cache, reachable, visited_space); - _PyGC_MoveToReachable(type->tp_mro, reachable, visited_space); - _PyGC_MoveToReachable(type->tp_bases, reachable, visited_space); - _PyGC_MoveToReachable((PyObject *)type->tp_base, reachable, visited_space); - _PyGC_MoveToReachable(((PyHeapTypeObject *)type)->ht_module, reachable, visited_space); + _PyGC_MoveUnvisited(type->tp_dict, to, visited_space); + _PyGC_MoveUnvisited(type->tp_cache, to, visited_space); + _PyGC_MoveUnvisited(type->tp_mro, to, visited_space); + _PyGC_MoveUnvisited(type->tp_bases, to, visited_space); + _PyGC_MoveUnvisited((PyObject *)type->tp_base, to, visited_space); + _PyGC_MoveUnvisited(((PyHeapTypeObject *)type)->ht_module, to, visited_space); } static int diff --git a/Python/gc.c b/Python/gc.c index 1f85c9b7708cd0..15c23ba40ed642 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1419,14 +1419,14 @@ completed_cycle(GCState *gcstate) } void -_PyGC_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) +_PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && gc_old_space(gc) != visited_space) { gc_flip_old_space(gc); - gc_list_move(gc, reachable); + gc_list_move(gc, to); } } } @@ -1434,13 +1434,13 @@ _PyGC_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space) /* TO DO -- Move this into pycore_stackref.h */ void -_PyFrame_MoveToReachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int visited_space) +_PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space) { _PyStackRef *locals = frame->localsplus; _PyStackRef *sp = frame->stackpointer; - _PyGC_MoveToReachable(frame->f_locals, reachable, visited_space); + _PyGC_MoveUnvisited(frame->f_locals, to, visited_space); PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - _PyGC_MoveToReachable(func, reachable, visited_space); + _PyGC_MoveUnvisited(func, to, visited_space); while (sp > locals) { sp--; _PyStackRef ref = *sp; @@ -1451,25 +1451,25 @@ _PyFrame_MoveToReachable(_PyInterpreterFrame *frame, PyGC_Head *reachable, int v if (_PyObject_GC_IS_TRACKED(op) && gc_old_space(gc) != visited_space) { gc_flip_old_space(gc); - gc_list_move(gc, reachable); + gc_list_move(gc, to); } } } } } -extern void _PySet_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); -extern void _PyDict_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); -extern void _PyGen_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); -extern void _PyAsyncGen_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); -extern void _PyAsyncAsend_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); -extern void _PyList_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); -extern void _PyTuple_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); -extern void _PyType_MoveToReachable(PyObject *op, PyGC_Head *reachable, int visited_space); +extern void _PySet_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +extern void _PyDict_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +extern void _PyGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +extern void _PyAsyncGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +extern void _PyAsyncAsend_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +extern void _PyList_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +extern void _PyTuple_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +extern void _PyType_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); static Py_ssize_t -mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +move_all_transitively_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty struct container_and_flag arg = { @@ -1486,12 +1486,19 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) objects_marked++; PyObject *op = FROM_GC(gc); assert(PyObject_IS_GC(op)); + assert(_PyObject_GC_IS_TRACKED(op)); + if (_Py_IsImmortal(op)) { + PyGC_Head *prev = _PyGCHead_PREV(gc); + gc_list_move(gc, &get_gc_state()->permanent_generation.head); + gc = prev; + continue; + } switch(Py_TYPE(op)->tp_version_tag) { case _Py_TYPE_VERSION_LIST: - _PyList_MoveToReachable(op, reachable, visited_space); + _PyList_MoveUnvisited(op, reachable, visited_space); break; case _Py_TYPE_VERSION_TUPLE: - _PyTuple_MoveToReachable(op, reachable, visited_space); + _PyTuple_MoveUnvisited(op, reachable, visited_space); break; case _Py_TYPE_VERSION_MODULE: { @@ -1507,24 +1514,24 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) } /* fall through */ case _Py_TYPE_VERSION_DICT: - _PyDict_MoveToReachable(op, reachable, visited_space); + _PyDict_MoveUnvisited(op, reachable, visited_space); break; case _Py_TYPE_VERSION_SET: case _Py_TYPE_VERSION_FROZEN_SET: - _PySet_MoveToReachable(op, reachable, visited_space); + _PySet_MoveUnvisited(op, reachable, visited_space); break; case _Py_TYPE_VERSION_ASYNC_GENERATOR: - _PyAsyncGen_MoveToReachable(op, reachable, visited_space); + _PyAsyncGen_MoveUnvisited(op, reachable, visited_space); break; case _Py_TYPE_VERSION_ASYNC_ASEND: - _PyAsyncAsend_MoveToReachable(op, reachable, visited_space); + _PyAsyncAsend_MoveUnvisited(op, reachable, visited_space); break; case _Py_TYPE_VERSION_COROUTINE: case _Py_TYPE_VERSION_GENERATOR: - _PyGen_MoveToReachable(op, reachable, visited_space); + _PyGen_MoveUnvisited(op, reachable, visited_space); break; case _Py_TYPE_VERSION_TYPE: - _PyType_MoveToReachable(op, reachable, visited_space); + _PyType_MoveUnvisited(op, reachable, visited_space); break; default: { @@ -1543,19 +1550,19 @@ mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_sp PyGC_Head reachable; gc_list_init(&reachable); Py_ssize_t objects_marked = 0; - _PyGC_MoveToReachable(interp->sysdict, &reachable, visited_space); - _PyGC_MoveToReachable(interp->builtins, &reachable, visited_space); - _PyGC_MoveToReachable(interp->dict, &reachable, visited_space); + _PyGC_MoveUnvisited(interp->sysdict, &reachable, visited_space); + _PyGC_MoveUnvisited(interp->builtins, &reachable, visited_space); + _PyGC_MoveUnvisited(interp->dict, &reachable, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - _PyGC_MoveToReachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); - _PyGC_MoveToReachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + _PyGC_MoveUnvisited(types->builtins.initialized[i].tp_dict, &reachable, visited_space); + _PyGC_MoveUnvisited(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - _PyGC_MoveToReachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); - _PyGC_MoveToReachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + _PyGC_MoveUnvisited(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); + _PyGC_MoveUnvisited(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); + objects_marked += move_all_transitively_reachable(&reachable, visited, visited_space); assert(gc_list_is_empty(&reachable)); return objects_marked + 20; } @@ -1577,7 +1584,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - _PyFrame_MoveToReachable(frame, &reachable, visited_space); + _PyFrame_MoveUnvisited(frame, &reachable, visited_space); if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1590,7 +1597,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - Py_ssize_t objects_marked = mark_all_reachable(&reachable, visited, visited_space); + Py_ssize_t objects_marked = move_all_transitively_reachable(&reachable, visited, visited_space); assert(gc_list_is_empty(&reachable)); return objects_marked; } @@ -1662,12 +1669,12 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - PyGC_Head increment; - gc_list_init(&increment); Py_ssize_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); gcstate->work_to_do -= objects_marked; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); + PyGC_Head increment; + gc_list_init(&increment); gc_list_merge(&gcstate->young.head, &increment); gc_list_validate_space(&increment, gcstate->visited_space); Py_ssize_t increment_size = 0; From b1c7ab03cbfa590d9bfd344606bf9a19ccac6f84 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 13 Nov 2024 19:07:59 +0000 Subject: [PATCH 06/23] Remove expand_region_transitively_reachable and use move_all_transitively_reachable. --- Python/gc.c | 44 ++++++++------------------------------------ 1 file changed, 8 insertions(+), 36 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 15c23ba40ed642..dcc14180da61df 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1369,36 +1369,6 @@ visit_add_to_container(PyObject *op, void *arg) return 0; } -static Py_ssize_t -expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) -{ - struct container_and_flag arg = { - .container = container, - .visited_space = gcstate->visited_space, - .size = 0 - }; - assert(GC_NEXT(gc) == container); - while (gc != container) { - /* Survivors will be moved to visited space, so they should - * have been marked as visited */ - assert(IS_IN_VISITED(gc, gcstate->visited_space)); - PyObject *op = FROM_GC(gc); - assert(_PyObject_GC_IS_TRACKED(op)); - if (_Py_IsImmortal(op)) { - PyGC_Head *next = GC_NEXT(gc); - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = next; - continue; - } - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); - gc = GC_NEXT(gc); - } - return arg.size; -} - /* Do bookkeeping for a completed GC cycle */ static void completed_cycle(GCState *gcstate) @@ -1611,7 +1581,6 @@ mark_at_start(PyThreadState *tstate) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); - gcstate->work_to_do -= objects_marked; gcstate->phase = GC_PHASE_COLLECT; return objects_marked; } @@ -1675,19 +1644,22 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_set_space(&gcstate->young.head, gcstate->visited_space); PyGC_Head increment; gc_list_init(&increment); - gc_list_merge(&gcstate->young.head, &increment); + PyGC_Head working; + gc_list_init(&working); + gc_list_merge(&gcstate->young.head, &working); gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = 0; + Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); + assert(gc_list_is_empty(&working)); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_list_move(gc, &increment); - increment_size++; + gc_list_move(gc, &working); assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); - increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); + increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); + assert(gc_list_is_empty(&working)); } GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); gc_list_validate_space(&increment, gcstate->visited_space); From 51ff78e646b2f5d17a00464286e7145473d40041 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Dec 2024 16:07:53 +0000 Subject: [PATCH 07/23] Fix compiler warnings and linkage --- Include/internal/pycore_gc.h | 2 +- Python/gc.c | 15 +++------------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 89bc77b8c8bd50..fb7e3231737258 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -382,7 +382,7 @@ extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp, gcvisitobjects_t callback, void *arg); #endif -void _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +PyAPI_FUNC(void) _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); #ifdef __cplusplus } diff --git a/Python/gc.c b/Python/gc.c index 61d21d932140f6..7036a06738fb72 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1330,15 +1330,6 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); } -#ifndef NDEBUG -static inline int -IS_IN_VISITED(PyGC_Head *gc, int visited_space) -{ - assert(visited_space == 0 || other_space(visited_space) == 0); - return gc_old_space(gc) == visited_space; -} -#endif - struct container_and_flag { PyGC_Head *container; int visited_space; @@ -1407,8 +1398,6 @@ _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) } } -/* TO DO -- Move this into pycore_stackref.h */ - void _PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space) { @@ -1486,12 +1475,13 @@ move_all_transitively_reachable(PyGC_Head *reachable, PyGC_Head *visited, int vi } op = m->md_dict; assert (op != NULL); + /* fall through */ } - /* fall through */ case _Py_TYPE_VERSION_DICT: _PyDict_MoveUnvisited(op, reachable, visited_space); break; case _Py_TYPE_VERSION_SET: + /* fall through */ case _Py_TYPE_VERSION_FROZEN_SET: _PySet_MoveUnvisited(op, reachable, visited_space); break; @@ -1502,6 +1492,7 @@ move_all_transitively_reachable(PyGC_Head *reachable, PyGC_Head *visited, int vi _PyAsyncAsend_MoveUnvisited(op, reachable, visited_space); break; case _Py_TYPE_VERSION_COROUTINE: + /* fall through */ case _Py_TYPE_VERSION_GENERATOR: _PyGen_MoveUnvisited(op, reachable, visited_space); break; From df907b5b777918bf7233ee310a6d97aa3dd86ff2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Dec 2024 16:12:46 +0000 Subject: [PATCH 08/23] Fix another linkage issue --- Include/internal/pycore_frame.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 3fbdb68ac543c3..f7ca390c5e5ad7 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -409,7 +409,7 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func, size_t argcount, PyObject *kwnames, _PyInterpreterFrame *previous); -void +PyAPI_FUNC(void) _PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space); #ifdef __cplusplus From 9ca64f55e63e8425fc1075f296ce7dc63918fc62 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Dec 2024 16:33:20 +0000 Subject: [PATCH 09/23] Try 'extern' --- Include/internal/pycore_frame.h | 2 +- Include/internal/pycore_gc.h | 2 +- Python/gc.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index f7ca390c5e5ad7..eb802d4fe4dcb7 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -409,7 +409,7 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func, size_t argcount, PyObject *kwnames, _PyInterpreterFrame *previous); -PyAPI_FUNC(void) +extern void _PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space); #ifdef __cplusplus diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index fb7e3231737258..c4f9bcc9771bed 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -382,7 +382,7 @@ extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp, gcvisitobjects_t callback, void *arg); #endif -PyAPI_FUNC(void) _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +extern void _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); #ifdef __cplusplus } diff --git a/Python/gc.c b/Python/gc.c index 7036a06738fb72..4a2decdba06789 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1475,8 +1475,8 @@ move_all_transitively_reachable(PyGC_Head *reachable, PyGC_Head *visited, int vi } op = m->md_dict; assert (op != NULL); - /* fall through */ } + /* fall through */ case _Py_TYPE_VERSION_DICT: _PyDict_MoveUnvisited(op, reachable, visited_space); break; From bda13f44a28ea175ce6d3fea7f5118007f1298e7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Dec 2024 16:49:20 +0000 Subject: [PATCH 10/23] Go back to PyAPI_FUNC and move functions together --- Include/internal/pycore_frame.h | 2 +- Include/internal/pycore_gc.h | 3 ++- Python/gc.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index eb802d4fe4dcb7..f7ca390c5e5ad7 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -409,7 +409,7 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func, size_t argcount, PyObject *kwnames, _PyInterpreterFrame *previous); -extern void +PyAPI_FUNC(void) _PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space); #ifdef __cplusplus diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index c4f9bcc9771bed..3d6317dcc54548 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -382,7 +382,8 @@ extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp, gcvisitobjects_t callback, void *arg); #endif -extern void _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +PyAPI_FUNC(void) _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); +PyAPI_FUNC(void) _PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space); #ifdef __cplusplus } diff --git a/Python/gc.c b/Python/gc.c index 4a2decdba06789..7036a06738fb72 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1475,8 +1475,8 @@ move_all_transitively_reachable(PyGC_Head *reachable, PyGC_Head *visited, int vi } op = m->md_dict; assert (op != NULL); - } /* fall through */ + } case _Py_TYPE_VERSION_DICT: _PyDict_MoveUnvisited(op, reachable, visited_space); break; From d9d63c8c249d306f3a0892278461985a0bf7061c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Dec 2024 16:52:50 +0000 Subject: [PATCH 11/23] Use _Py_FALLTHROUGH --- Include/internal/pycore_gc.h | 1 - Python/gc.c | 8 +++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 3d6317dcc54548..fb7e3231737258 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -383,7 +383,6 @@ extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp, #endif PyAPI_FUNC(void) _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); -PyAPI_FUNC(void) _PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space); #ifdef __cplusplus } diff --git a/Python/gc.c b/Python/gc.c index 7036a06738fb72..fcda77de42b724 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1475,13 +1475,12 @@ move_all_transitively_reachable(PyGC_Head *reachable, PyGC_Head *visited, int vi } op = m->md_dict; assert (op != NULL); - /* fall through */ + _Py_FALLTHROUGH; } case _Py_TYPE_VERSION_DICT: _PyDict_MoveUnvisited(op, reachable, visited_space); break; - case _Py_TYPE_VERSION_SET: - /* fall through */ + case _Py_TYPE_VERSION_SET: _Py_FALLTHROUGH; case _Py_TYPE_VERSION_FROZEN_SET: _PySet_MoveUnvisited(op, reachable, visited_space); break; @@ -1491,8 +1490,7 @@ move_all_transitively_reachable(PyGC_Head *reachable, PyGC_Head *visited, int vi case _Py_TYPE_VERSION_ASYNC_ASEND: _PyAsyncAsend_MoveUnvisited(op, reachable, visited_space); break; - case _Py_TYPE_VERSION_COROUTINE: - /* fall through */ + case _Py_TYPE_VERSION_COROUTINE: _Py_FALLTHROUGH; case _Py_TYPE_VERSION_GENERATOR: _PyGen_MoveUnvisited(op, reachable, visited_space); break; From 57b8820bd054d74435283277bd81c11e7a446ce5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Dec 2024 17:41:15 +0000 Subject: [PATCH 12/23] Add necessary #ifndef Py_GIL_DISABLED --- Objects/dictobject.c | 2 ++ Objects/genobject.c | 7 ++++++- Objects/listobject.c | 2 ++ Objects/setobject.c | 2 ++ Objects/tupleobject.c | 2 ++ Objects/typeobject.c | 3 ++- 6 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 618251221e254a..e32ce2a34863e5 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -4494,6 +4494,7 @@ dict_popitem_impl(PyDictObject *self) return res; } +#ifndef Py_GIL_DISABLED void _PyDict_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { @@ -4529,6 +4530,7 @@ _PyDict_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) } } } +#endif static int dict_traverse(PyObject *op, visitproc visit, void *arg) diff --git a/Objects/genobject.c b/Objects/genobject.c index b570712d3171dc..85b7811f377f5e 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -78,6 +78,7 @@ gen_traverse(PyObject *self, visitproc visit, void *arg) return 0; } +#ifndef Py_GIL_DISABLED void _PyGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { @@ -93,6 +94,7 @@ _PyGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) _PyInterpreterFrame *frame = &gen->gi_iframe; _PyFrame_MoveUnvisited(frame, to, visited_space); } +#endif void _PyGen_Finalize(PyObject *self) @@ -1447,7 +1449,7 @@ typedef struct _PyAsyncGenWrappedValue { (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ _Py_CAST(_PyAsyncGenWrappedValue*, (op))) - +#ifndef Py_GIL_DISABLED void _PyAsyncGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { @@ -1455,6 +1457,7 @@ _PyAsyncGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) _PyGC_MoveUnvisited(ag->ag_origin_or_finalizer, to, visited_space); _PyGen_MoveUnvisited(op, to, visited_space); } +#endif static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) @@ -1738,6 +1741,7 @@ async_gen_asend_dealloc(PyObject *self) _Py_FREELIST_FREE(async_gen_asends, self, PyObject_GC_Del); } +#ifndef Py_GIL_DISABLED void _PyAsyncAsend_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { @@ -1745,6 +1749,7 @@ _PyAsyncAsend_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) _PyGC_MoveUnvisited((PyObject *)ags->ags_sendval, to, visited_space); _PyAsyncGen_MoveUnvisited((PyObject *)ags->ags_gen, to, visited_space); } +#endif static int async_gen_asend_traverse(PyObject *self, visitproc visit, void *arg) diff --git a/Objects/listobject.c b/Objects/listobject.c index e190899fbcc5e7..d0fcdc3e489510 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3282,6 +3282,7 @@ list_remove_impl(PyListObject *self, PyObject *value) return NULL; } +#ifndef Py_GIL_DISABLED void _PyList_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { @@ -3292,6 +3293,7 @@ _PyList_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) _PyGC_MoveUnvisited(item, to, visited_space); } } +#endif static int list_traverse(PyObject *self, visitproc visit, void *arg) diff --git a/Objects/setobject.c b/Objects/setobject.c index 05272be07135ad..61f572e61a29bc 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -700,6 +700,7 @@ set_traverse(PyObject *self, visitproc visit, void *arg) return 0; } +#ifndef Py_GIL_DISABLED void _PySet_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { @@ -710,6 +711,7 @@ _PySet_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) _PyGC_MoveUnvisited(entry->key, to, visited_space); } } +#endif /* Work to increase the bit dispersion for closely spaced hash values. This is important because some use cases have many combinations of a diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 9361fdd810ed85..f05e3bb7407fcc 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -623,6 +623,7 @@ tuple_count(PyTupleObject *self, PyObject *value) return PyLong_FromSsize_t(count); } +#ifndef Py_GIL_DISABLED void _PyTuple_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { @@ -632,6 +633,7 @@ _PyTuple_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) _PyGC_MoveUnvisited(item, to, visited_space); } } +#endif static int tuple_traverse(PyObject *self, visitproc visit, void *arg) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 96f931ad95f558..2dcaf6e2f48413 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6232,7 +6232,7 @@ PyDoc_STRVAR(type_doc, "type(object) -> the object's type\n" "type(name, bases, dict, **kwds) -> a new type"); - +#ifndef Py_GIL_DISABLED void _PyType_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) { @@ -6244,6 +6244,7 @@ _PyType_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) _PyGC_MoveUnvisited((PyObject *)type->tp_base, to, visited_space); _PyGC_MoveUnvisited(((PyHeapTypeObject *)type)->ht_module, to, visited_space); } +#endif static int type_traverse(PyObject *self, visitproc visit, void *arg) From a607059498f74138315ad6d628d9808fedd723b8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 3 Dec 2024 11:44:46 +0000 Subject: [PATCH 13/23] Go back to using tp_traverse, but make traversal more efficient --- Include/internal/pycore_frame.h | 3 - Objects/dictobject.c | 42 +----- Objects/genobject.c | 38 ------ Objects/listobject.c | 13 -- Objects/setobject.c | 13 -- Objects/tupleobject.c | 12 -- Objects/typeobject.c | 27 ++-- Python/gc.c | 229 ++++++++++++++------------------ 8 files changed, 112 insertions(+), 265 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index f7ca390c5e5ad7..b786c5f49e9831 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -409,9 +409,6 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func, size_t argcount, PyObject *kwnames, _PyInterpreterFrame *previous); -PyAPI_FUNC(void) -_PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space); - #ifdef __cplusplus } #endif diff --git a/Objects/dictobject.c b/Objects/dictobject.c index e32ce2a34863e5..51c1f69a7be4d2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -4494,44 +4494,6 @@ dict_popitem_impl(PyDictObject *self) return res; } -#ifndef Py_GIL_DISABLED -void -_PyDict_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) -{ - PyDictObject *mp = (PyDictObject *)op; - PyDictKeysObject *keys = mp->ma_keys; - Py_ssize_t i, n = keys->dk_nentries; - if (DK_IS_UNICODE(keys)) { - if (_PyDict_HasSplitTable(mp)) { - if (!mp->ma_values->embedded) { - for (i = 0; i < n; i++) { - PyObject *value = mp->ma_values->values[i]; - _PyGC_MoveUnvisited(value, to, visited_space); - } - } - } - else { - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); - for (i = 0; i < n; i++) { - PyObject *value = entries[i].me_value; - _PyGC_MoveUnvisited(value, to, visited_space); - } - } - } - else { - PyDictKeyEntry *entries = DK_ENTRIES(keys); - for (i = 0; i < n; i++) { - if (entries[i].me_value != NULL) { - PyObject *key = entries[i].me_key; - _PyGC_MoveUnvisited(key, to, visited_space); - PyObject *value = entries[i].me_value; - _PyGC_MoveUnvisited(value, to, visited_space); - } - } - } -} -#endif - static int dict_traverse(PyObject *op, visitproc visit, void *arg) { @@ -7105,9 +7067,7 @@ int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) { PyTypeObject *tp = Py_TYPE(obj); - if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { - return 0; - } + assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); if (values->valid) { diff --git a/Objects/genobject.c b/Objects/genobject.c index 85b7811f377f5e..d8abb3d46f83cc 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -78,24 +78,6 @@ gen_traverse(PyObject *self, visitproc visit, void *arg) return 0; } -#ifndef Py_GIL_DISABLED -void -_PyGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) -{ - PyGenObject *gen = (PyGenObject *)op; - if (gen->gi_frame_state == FRAME_CLEARED) { - return; - } - _PyGC_MoveUnvisited(gen->gi_exc_state.exc_value, to, visited_space); - if (gen->gi_frame_state == FRAME_EXECUTING) { - /* if executing we already traversed it on the stack */ - return; - } - _PyInterpreterFrame *frame = &gen->gi_iframe; - _PyFrame_MoveUnvisited(frame, to, visited_space); -} -#endif - void _PyGen_Finalize(PyObject *self) { @@ -1449,16 +1431,6 @@ typedef struct _PyAsyncGenWrappedValue { (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ _Py_CAST(_PyAsyncGenWrappedValue*, (op))) -#ifndef Py_GIL_DISABLED -void -_PyAsyncGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) -{ - PyAsyncGenObject *ag = _PyAsyncGenObject_CAST(op); - _PyGC_MoveUnvisited(ag->ag_origin_or_finalizer, to, visited_space); - _PyGen_MoveUnvisited(op, to, visited_space); -} -#endif - static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1741,16 +1713,6 @@ async_gen_asend_dealloc(PyObject *self) _Py_FREELIST_FREE(async_gen_asends, self, PyObject_GC_Del); } -#ifndef Py_GIL_DISABLED -void -_PyAsyncAsend_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) -{ - PyAsyncGenASend *ags = _PyAsyncGenASend_CAST(op); - _PyGC_MoveUnvisited((PyObject *)ags->ags_sendval, to, visited_space); - _PyAsyncGen_MoveUnvisited((PyObject *)ags->ags_gen, to, visited_space); -} -#endif - static int async_gen_asend_traverse(PyObject *self, visitproc visit, void *arg) { diff --git a/Objects/listobject.c b/Objects/listobject.c index d0fcdc3e489510..4b24f4a428e18b 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3282,19 +3282,6 @@ list_remove_impl(PyListObject *self, PyObject *value) return NULL; } -#ifndef Py_GIL_DISABLED -void -_PyList_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) -{ - PyListObject *o = (PyListObject *)op; - Py_ssize_t i; - for (i = Py_SIZE(o); --i >= 0; ) { - PyObject *item = o->ob_item[i]; - _PyGC_MoveUnvisited(item, to, visited_space); - } -} -#endif - static int list_traverse(PyObject *self, visitproc visit, void *arg) { diff --git a/Objects/setobject.c b/Objects/setobject.c index 61f572e61a29bc..955ccbebf74b54 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -700,19 +700,6 @@ set_traverse(PyObject *self, visitproc visit, void *arg) return 0; } -#ifndef Py_GIL_DISABLED -void -_PySet_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) -{ - PySetObject *so = (PySetObject *)op; - Py_ssize_t pos = 0; - setentry *entry; - while (set_next(so, &pos, &entry)) { - _PyGC_MoveUnvisited(entry->key, to, visited_space); - } -} -#endif - /* Work to increase the bit dispersion for closely spaced hash values. This is important because some use cases have many combinations of a small number of elements with nearby hashes so that many distinct diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index f05e3bb7407fcc..49977726eadca9 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -623,18 +623,6 @@ tuple_count(PyTupleObject *self, PyObject *value) return PyLong_FromSsize_t(count); } -#ifndef Py_GIL_DISABLED -void -_PyTuple_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) -{ - PyTupleObject *o = (PyTupleObject *)op; - for (Py_ssize_t i = Py_SIZE(o); --i >= 0; ) { - PyObject *item = o->ob_item[i]; - _PyGC_MoveUnvisited(item, to, visited_space); - } -} -#endif - static int tuple_traverse(PyObject *self, visitproc visit, void *arg) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2dcaf6e2f48413..7d3cb41bec065e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2354,6 +2354,16 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg) return 0; } + +static int +plain_object_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyTypeObject *type = Py_TYPE(self); + assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT); + Py_VISIT(type); + return PyObject_VisitManagedDict(self, visit, arg); +} + static void clear_slots(PyTypeObject *type, PyObject *self) { @@ -4146,6 +4156,10 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; + + if (type->tp_basicsize == sizeof(PyObject *)) { + type->tp_traverse = plain_object_traverse; + } } type->tp_basicsize = slotoffset; @@ -6232,19 +6246,6 @@ PyDoc_STRVAR(type_doc, "type(object) -> the object's type\n" "type(name, bases, dict, **kwds) -> a new type"); -#ifndef Py_GIL_DISABLED -void -_PyType_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) -{ - PyTypeObject *type = (PyTypeObject *)op; - _PyGC_MoveUnvisited(type->tp_dict, to, visited_space); - _PyGC_MoveUnvisited(type->tp_cache, to, visited_space); - _PyGC_MoveUnvisited(type->tp_mro, to, visited_space); - _PyGC_MoveUnvisited(type->tp_bases, to, visited_space); - _PyGC_MoveUnvisited((PyObject *)type->tp_base, to, visited_space); - _PyGC_MoveUnvisited(((PyHeapTypeObject *)type)->ht_module, to, visited_space); -} -#endif static int type_traverse(PyObject *self, visitproc visit, void *arg) diff --git a/Python/gc.c b/Python/gc.c index fcda77de42b724..43703b1e4f37ce 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1330,29 +1330,67 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); } -struct container_and_flag { - PyGC_Head *container; +typedef struct work_stack { + PyGC_Head *top; int visited_space; - intptr_t size; -}; +} WorkStack; -/* A traversal callback for adding to container) */ -static int -visit_add_to_container(PyObject *op, void *arg) +static inline void +push_to_stack(PyGC_Head *gc, WorkStack *stack) { - OBJECT_STAT_INC(object_visits); - struct container_and_flag *cf = (struct container_and_flag *)arg; - int visited = cf->visited_space; - assert(visited == get_gc_state()->visited_space); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *prev = GC_PREV(gc); + PyGC_Head *next = GC_NEXT(gc); + + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + _PyGCHead_SET_PREV(gc, stack->top); + stack->top = gc; +} + +/* append list `from` to `stack`; `from` becomes an empty list */ +static void +move_list_to_stack(PyGC_Head *from, WorkStack *stack) +{ + if (!gc_list_is_empty(from)) { + PyGC_Head *from_head = GC_NEXT(from); + PyGC_Head *from_tail = GC_PREV(from); + _PyGCHead_SET_PREV(from_head, stack->top); + stack->top = from_tail; + gc_list_init(from); + } +} + +static inline void +move_to_stack(PyObject *op, WorkStack *stack, int visited_space) +{ + assert(op != NULL); + if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited) { + gc_old_space(gc) != visited_space) { + assert(!_Py_IsImmortal(op)); gc_flip_old_space(gc); - gc_list_move(gc, cf->container); - cf->size++; + push_to_stack(gc, stack); } } +} + +static void +move_unvisited(PyObject *op, WorkStack *stack, int visited_space) +{ + move_to_stack(op, stack, visited_space); +} + +#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V)) + +/* A traversal callback for adding to container */ +static int +visit_add_to_container(PyObject *op, void *arg) +{ + OBJECT_STAT_INC(object_visits); + WorkStack *stack = (WorkStack *)arg; + assert(stack->visited_space == get_gc_state()->visited_space); + move_to_stack(op, stack, stack->visited_space); return 0; } @@ -1385,123 +1423,47 @@ completed_scavenge(GCState *gcstate) gcstate->phase = GC_PHASE_MARK; } -void -_PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space) -{ - if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, to); - } - } -} - -void -_PyFrame_MoveUnvisited(_PyInterpreterFrame *frame, PyGC_Head *to, int visited_space) +static void +frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space) { _PyStackRef *locals = frame->localsplus; _PyStackRef *sp = frame->stackpointer; - _PyGC_MoveUnvisited(frame->f_locals, to, visited_space); + if (frame->f_locals != NULL) { + move_unvisited(frame->f_locals, stack, visited_space); + } PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - _PyGC_MoveUnvisited(func, to, visited_space); + move_unvisited(func, stack, visited_space); while (sp > locals) { sp--; _PyStackRef ref = *sp; if (PyStackRef_IsNonNullMortal(ref)) { PyObject *op = PyStackRef_AsPyObjectBorrow(ref); - if (_PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, to); - } - } + move_unvisited(op, stack, visited_space); } } } -extern void _PySet_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); -extern void _PyDict_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); -extern void _PyGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); -extern void _PyAsyncGen_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); -extern void _PyAsyncAsend_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); -extern void _PyList_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); -extern void _PyTuple_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); -extern void _PyType_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); - static Py_ssize_t -move_all_transitively_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty - struct container_and_flag arg = { - .container = reachable, - .visited_space = visited_space, - .size = 0 - }; Py_ssize_t objects_marked = 0; - while (!gc_list_is_empty(reachable)) { - /* Pop from the back of the list, to do depth first traversal */ - PyGC_Head *gc = _PyGCHead_PREV(reachable); + while (stack->top != NULL) { + /* Pop from the stack, to do depth first traversal */ + PyGC_Head *gc = stack->top; + stack->top = _PyGCHead_PREV(gc); assert(gc_old_space(gc) == visited_space); - gc_list_move(gc, visited); + gc_list_append(gc, visited); objects_marked++; PyObject *op = FROM_GC(gc); assert(PyObject_IS_GC(op)); assert(_PyObject_GC_IS_TRACKED(op)); if (_Py_IsImmortal(op)) { - PyGC_Head *prev = _PyGCHead_PREV(gc); - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = prev; - continue; + _PyObject_GC_UNTRACK(op); } - switch(Py_TYPE(op)->tp_version_tag) { - case _Py_TYPE_VERSION_LIST: - _PyList_MoveUnvisited(op, reachable, visited_space); - break; - case _Py_TYPE_VERSION_TUPLE: - _PyTuple_MoveUnvisited(op, reachable, visited_space); - break; - case _Py_TYPE_VERSION_MODULE: - { - PyModuleObject *m = (PyModuleObject *)op; - /* bpo-39824: Don't call m_traverse() if m_size > 0 and md_state=NULL */ - if (m->md_def && m->md_def->m_traverse - && (m->md_def->m_size <= 0 || m->md_state != NULL)) - { - m->md_def->m_traverse(op, visit_add_to_container, &arg); - } - op = m->md_dict; - assert (op != NULL); - _Py_FALLTHROUGH; - } - case _Py_TYPE_VERSION_DICT: - _PyDict_MoveUnvisited(op, reachable, visited_space); - break; - case _Py_TYPE_VERSION_SET: _Py_FALLTHROUGH; - case _Py_TYPE_VERSION_FROZEN_SET: - _PySet_MoveUnvisited(op, reachable, visited_space); - break; - case _Py_TYPE_VERSION_ASYNC_GENERATOR: - _PyAsyncGen_MoveUnvisited(op, reachable, visited_space); - break; - case _Py_TYPE_VERSION_ASYNC_ASEND: - _PyAsyncAsend_MoveUnvisited(op, reachable, visited_space); - break; - case _Py_TYPE_VERSION_COROUTINE: _Py_FALLTHROUGH; - case _Py_TYPE_VERSION_GENERATOR: - _PyGen_MoveUnvisited(op, reachable, visited_space); - break; - case _Py_TYPE_VERSION_TYPE: - _PyType_MoveUnvisited(op, reachable, visited_space); - break; - default: - { - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, visit_add_to_container, &arg); - } + else { + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, visit_add_to_container, stack); } } gc_list_validate_space(visited, visited_space); @@ -1511,8 +1473,9 @@ move_all_transitively_reachable(PyGC_Head *reachable, PyGC_Head *visited, int vi static intptr_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { - PyGC_Head reachable; - gc_list_init(&reachable); + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1525,7 +1488,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - _PyFrame_MoveUnvisited(frame, &reachable, visited_space); + frame_move_unvisited(frame, &stack, visited_space); if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1538,31 +1501,32 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - Py_ssize_t objects_marked = move_all_transitively_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } static intptr_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { - PyGC_Head reachable; - gc_list_init(&reachable); + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; Py_ssize_t objects_marked = 0; - _PyGC_MoveUnvisited(interp->sysdict, &reachable, visited_space); - _PyGC_MoveUnvisited(interp->builtins, &reachable, visited_space); - _PyGC_MoveUnvisited(interp->dict, &reachable, visited_space); + MOVE_UNVISITED(interp->sysdict, &stack, visited_space); + MOVE_UNVISITED(interp->builtins, &stack, visited_space); + MOVE_UNVISITED(interp->dict, &stack, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - _PyGC_MoveUnvisited(types->builtins.initialized[i].tp_dict, &reachable, visited_space); - _PyGC_MoveUnvisited(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - _PyGC_MoveUnvisited(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); - _PyGC_MoveUnvisited(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); } - objects_marked += move_all_transitively_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + objects_marked += move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } @@ -1634,22 +1598,23 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_set_space(&gcstate->young.head, gcstate->visited_space); PyGC_Head increment; gc_list_init(&increment); - PyGC_Head working; - gc_list_init(&working); - gc_list_merge(&gcstate->young.head, &working); - gc_list_validate_space(&increment, gcstate->visited_space); + WorkStack working; + working.top = 0; + working.visited_space = gcstate->visited_space; + move_list_to_stack(&gcstate->young.head, &working); Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); - assert(gc_list_is_empty(&working)); + gc_list_validate_space(&increment, gcstate->visited_space); + assert(working.top == NULL); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_list_move(gc, &working); - assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); + push_to_stack(gc, &working); + assert(!_Py_IsImmortal(FROM_GC(gc))); increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); - assert(gc_list_is_empty(&working)); + assert(working.top == NULL); } GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); From 15455081f85f9cf119f439d09fae735636fe011e Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 3 Dec 2024 12:14:58 +0000 Subject: [PATCH 14/23] Tidy up --- Include/internal/pycore_gc.h | 2 -- Include/internal/pycore_typeobject.h | 10 ++-------- Objects/genobject.c | 4 ---- Objects/moduleobject.c | 1 - Objects/typeobject.c | 1 - Python/gc.c | 6 ++++-- 6 files changed, 6 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index fb7e3231737258..4ff34bf8ead7d0 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -382,8 +382,6 @@ extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp, gcvisitobjects_t callback, void *arg); #endif -PyAPI_FUNC(void) _PyGC_MoveUnvisited(PyObject *op, PyGC_Head *to, int visited_space); - #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 838339b27e64db..7b39d07f976ee3 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -25,14 +25,8 @@ extern "C" { #define _Py_TYPE_VERSION_BYTEARRAY 9 #define _Py_TYPE_VERSION_BYTES 10 #define _Py_TYPE_VERSION_COMPLEX 11 -#define _Py_TYPE_VERSION_GENERATOR 12 -#define _Py_TYPE_VERSION_COROUTINE 13 -#define _Py_TYPE_VERSION_MODULE 14 -#define _Py_TYPE_VERSION_TYPE 15 -#define _Py_TYPE_VERSION_ASYNC_GENERATOR 16 -#define _Py_TYPE_VERSION_ASYNC_ASEND 17 - -#define _Py_TYPE_VERSION_NEXT 20 + +#define _Py_TYPE_VERSION_NEXT 16 #define _Py_TYPE_BASE_VERSION_TAG (2<<16) diff --git a/Objects/genobject.c b/Objects/genobject.c index d8abb3d46f83cc..33679afecb420f 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -883,7 +883,6 @@ PyTypeObject PyGen_Type = { gen_memberlist, /* tp_members */ gen_getsetlist, /* tp_getset */ .tp_finalize = _PyGen_Finalize, - .tp_version_tag = _Py_TYPE_VERSION_GENERATOR, }; static PyObject * @@ -1226,7 +1225,6 @@ PyTypeObject PyCoro_Type = { coro_memberlist, /* tp_members */ coro_getsetlist, /* tp_getset */ .tp_finalize = _PyGen_Finalize, - .tp_version_tag = _Py_TYPE_VERSION_COROUTINE, }; static void @@ -1640,7 +1638,6 @@ PyTypeObject PyAsyncGen_Type = { async_gen_memberlist, /* tp_members */ async_gen_getsetlist, /* tp_getset */ .tp_finalize = _PyGen_Finalize, - .tp_version_tag = _Py_TYPE_VERSION_ASYNC_GENERATOR, }; @@ -1886,7 +1883,6 @@ PyTypeObject _PyAsyncGenASend_Type = { async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_methods, /* tp_methods */ .tp_finalize = async_gen_asend_finalize, - .tp_version_tag = _Py_TYPE_VERSION_ASYNC_ASEND, }; diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index dee3d2d0395191..a8d64c9aefae6b 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -1356,5 +1356,4 @@ PyTypeObject PyModule_Type = { 0, /* tp_alloc */ new_module, /* tp_new */ PyObject_GC_Del, /* tp_free */ - .tp_version_tag = _Py_TYPE_VERSION_MODULE, }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7d3cb41bec065e..a887e02cb7c33c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6384,7 +6384,6 @@ PyTypeObject PyType_Type = { PyObject_GC_Del, /* tp_free */ type_is_gc, /* tp_is_gc */ .tp_vectorcall = type_vectorcall, - .tp_version_tag = _Py_TYPE_VERSION_TYPE, }; diff --git a/Python/gc.c b/Python/gc.c index 43703b1e4f37ce..f397c21bbe4456 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1436,9 +1436,11 @@ frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_s while (sp > locals) { sp--; _PyStackRef ref = *sp; - if (PyStackRef_IsNonNullMortal(ref)) { + if (!PyStackRef_IsNull(ref)) { PyObject *op = PyStackRef_AsPyObjectBorrow(ref); - move_unvisited(op, stack, visited_space); + if (!_Py_IsImmortal(op)) { + move_unvisited(op, stack, visited_space); + } } } } From a1a38c8da1e38fdb9392f37462f7f9d440d72ed3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 3 Dec 2024 12:43:15 +0000 Subject: [PATCH 15/23] A bit more tidying up --- Include/internal/pycore_stackref.h | 2 -- Objects/typeobject.c | 2 -- Python/gc.c | 1 - 3 files changed, 5 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index d440835350c8ab..90a3118352f7ae 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -189,8 +189,6 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; #endif // Py_GIL_DISABLED -#define PyStackRef_IsNonNullMortal(stackref) (!PyStackRef_IsNull(stackref) && !_Py_IsImmortal(PyStackRef_AsPyObjectBorrow(stackref))) - // Check if a stackref is exactly the same as another stackref, including the // the deferred bit. This can only be used safely if you know that the deferred // bits of `a` and `b` match. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a887e02cb7c33c..f31f8181d9de50 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4156,7 +4156,6 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; - if (type->tp_basicsize == sizeof(PyObject *)) { type->tp_traverse = plain_object_traverse; } @@ -6246,7 +6245,6 @@ PyDoc_STRVAR(type_doc, "type(object) -> the object's type\n" "type(name, bases, dict, **kwds) -> a new type"); - static int type_traverse(PyObject *self, visitproc visit, void *arg) { diff --git a/Python/gc.c b/Python/gc.c index f397c21bbe4456..b221538571a110 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1340,7 +1340,6 @@ push_to_stack(PyGC_Head *gc, WorkStack *stack) { PyGC_Head *prev = GC_PREV(gc); PyGC_Head *next = GC_NEXT(gc); - _PyGCHead_SET_NEXT(prev, next); _PyGCHead_SET_PREV(next, prev); _PyGCHead_SET_PREV(gc, stack->top); From 68fc90bb74654f57a1fab692044daeb6f493adfd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 3 Dec 2024 15:28:48 +0000 Subject: [PATCH 16/23] Move all work to do calculations to one place --- Python/gc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index b221538571a110..ee5d2abbdf566d 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1570,7 +1570,7 @@ assess_work_to_do(GCState *gcstate) heap_fraction = max_heap_fraction; } gcstate->young.count = 0; - return new_objects + heap_fraction; + return new_objects + heap_fraction * 2; } static void @@ -1617,6 +1617,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); assert(working.top == NULL); } + assert(increment_size == gc_list_size(&increment)); GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); @@ -1625,7 +1626,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); From 8893cf53bec4645245f11707bc0b11704cb40494 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 3 Dec 2024 18:38:21 +0000 Subject: [PATCH 17/23] Assume that increments are 50% garbage for work done calculation --- Lib/test/test_gc.py | 14 +++----------- Objects/typeobject.c | 2 +- Python/gc.c | 46 ++++++++++++++++++++++---------------------- 3 files changed, 27 insertions(+), 35 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b5140057a69d36..baf8e95dffdfce 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1161,27 +1161,19 @@ def make_ll(depth): return head head = make_ll(1000) - count = 1000 - - # There will be some objects we aren't counting, - # e.g. the gc stats dicts. This test checks - # that the counts don't grow, so we try to - # correct for the uncounted objects - # This is just an estimate. - CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - for i in range(20_000): + iterations = max(20_000, initial_heap_size) + for i in range(iterations): newhead = make_ll(20) - count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") + self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() diff --git a/Objects/typeobject.c b/Objects/typeobject.c index f31f8181d9de50..7d2c157d2b43f9 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4156,7 +4156,7 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; - if (type->tp_basicsize == sizeof(PyObject *)) { + if (type->tp_basicsize == sizeof(PyObject)) { type->tp_traverse = plain_object_traverse; } } diff --git a/Python/gc.c b/Python/gc.c index ee5d2abbdf566d..e09921f06a5157 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1277,17 +1277,12 @@ gc_list_set_space(PyGC_Head *list, int space) * space faster than objects are added to the old space. * * Each young or incremental collection adds a number of - * objects, S (for survivors) to the old space, and - * incremental collectors scan I objects from the old space. - * I > S must be true. We also want I > S * N to be where - * N > 1. Higher values of N mean that the old space is + * new objects (N) to the heap, and incremental collectors + * scan I objects from the old space. + * I > N must be true. We also want I > N * K to be where + * K > 2. Higher values of K mean that the old space is * scanned more rapidly. - * The default incremental threshold of 10 translates to - * N == 1.4 (1 + 4/threshold) */ - -/* Divide by 10, so that the default incremental threshold of 10 - * scans objects at 1% of the heap size */ #define SCAN_RATE_DIVISOR 10 static void @@ -1335,6 +1330,7 @@ typedef struct work_stack { int visited_space; } WorkStack; +/* Remove gc from the list it is currently in and push it to the stack */ static inline void push_to_stack(PyGC_Head *gc, WorkStack *stack) { @@ -1346,6 +1342,14 @@ push_to_stack(PyGC_Head *gc, WorkStack *stack) stack->top = gc; } +static inline PyGC_Head * +pop_from_stack(WorkStack *stack) +{ + PyGC_Head *gc = stack->top; + stack->top = _PyGCHead_PREV(gc); + return gc; +} + /* append list `from` to `stack`; `from` becomes an empty list */ static void move_list_to_stack(PyGC_Head *from, WorkStack *stack) @@ -1418,7 +1422,6 @@ completed_scavenge(GCState *gcstate) gc_list_set_space(&gcstate->old[not_visited].head, not_visited); } assert(gc_list_is_empty(&gcstate->old[visited].head)); - gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; } @@ -1450,9 +1453,7 @@ move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visite // Transitively traverse all objects from reachable, until empty Py_ssize_t objects_marked = 0; while (stack->top != NULL) { - /* Pop from the stack, to do depth first traversal */ - PyGC_Head *gc = stack->top; - stack->top = _PyGCHead_PREV(gc); + PyGC_Head *gc = pop_from_stack(stack); assert(gc_old_space(gc) == visited_space); gc_list_append(gc, visited); objects_marked++; @@ -1513,7 +1514,6 @@ mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_sp WorkStack stack; stack.top = NULL; stack.visited_space = visited_space; - Py_ssize_t objects_marked = 0; MOVE_UNVISITED(interp->sysdict, &stack, visited_space); MOVE_UNVISITED(interp->builtins, &stack, visited_space); MOVE_UNVISITED(interp->dict, &stack, visited_space); @@ -1526,7 +1526,7 @@ mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_sp MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); } - objects_marked += move_all_transitively_reachable(&stack, visited, visited_space); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); assert(stack.top == NULL); return objects_marked; } @@ -1547,12 +1547,11 @@ mark_at_start(PyThreadState *tstate) static intptr_t assess_work_to_do(GCState *gcstate) { - /* The amount of work we want to do depends on three things. + /* The amount of work we want to do depends on two things. * 1. The number of new objects created - * 2. The growth in heap size since the last collection - * 3. The heap size (up to the number of new objects, to avoid quadratic effects) + * 2. The heap size (up to twice the number of new objects, to avoid quadratic effects) * - * For a steady state heap, the amount of work to do is three times the number + * For a large, steady state heap, the amount of work to do is three times the number * of new objects added to the heap. This ensures that we stay ahead in the * worst case of all new objects being garbage. * @@ -1564,13 +1563,13 @@ assess_work_to_do(GCState *gcstate) scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*3/2; + intptr_t max_heap_fraction = new_objects*2; intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; if (heap_fraction > max_heap_fraction) { heap_fraction = max_heap_fraction; } gcstate->young.count = 0; - return new_objects + heap_fraction * 2; + return new_objects + heap_fraction; } static void @@ -1606,7 +1605,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); gc_list_validate_space(&increment, gcstate->visited_space); assert(working.top == NULL); - while (increment_size < gcstate->work_to_do) { + while (increment_size < gcstate->work_to_do * 2) { if (gc_list_is_empty(not_visited)) { break; } @@ -1626,7 +1625,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do -= increment_size; + gcstate->work_to_do -= increment_size/2; add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { @@ -1661,6 +1660,7 @@ gc_collect_full(PyThreadState *tstate, gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_scavenge(gcstate); + gcstate->work_to_do = -gcstate->young.threshold; _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); From ba20c7c990ff0dbb6af8ec27b5bb8780015f3edc Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Dec 2024 09:36:01 +0000 Subject: [PATCH 18/23] Elaborate comment --- Python/gc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index e09921f06a5157..833539a13fdcb0 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1550,13 +1550,16 @@ assess_work_to_do(GCState *gcstate) /* The amount of work we want to do depends on two things. * 1. The number of new objects created * 2. The heap size (up to twice the number of new objects, to avoid quadratic effects) + * 3. The amount of garbage. * - * For a large, steady state heap, the amount of work to do is three times the number - * of new objects added to the heap. This ensures that we stay ahead in the - * worst case of all new objects being garbage. + * We cannot know how much of the heap is garbage, but we know that no reachable object + * is garbage. We make a (fairly pessismistic) assumption that half the heap not + * reachable from the roots is garbage, and count collections of increments as half as efficient + * as processing the heap as the marking phase. * - * This could be improved by tracking survival rates, but it is still a - * large improvement on the non-marking approach. + * For a large, steady state heap, the amount of work to do is at least three times the + * number of new objects added to the heap. This ensures that we stay ahead in the + * worst case of all new objects being garbage. */ intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { From 8262bf010f85d3a4338d644513c1613ec8d1771f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Dec 2024 11:47:04 +0000 Subject: [PATCH 19/23] More tweaking of thresholds --- Python/gc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 833539a13fdcb0..338d81eb2d631b 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1283,7 +1283,7 @@ gc_list_set_space(PyGC_Head *list, int space) * K > 2. Higher values of K mean that the old space is * scanned more rapidly. */ -#define SCAN_RATE_DIVISOR 10 +#define SCAN_RATE_DIVISOR 5 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1566,7 +1566,7 @@ assess_work_to_do(GCState *gcstate) scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*2; + intptr_t max_heap_fraction = new_objects*5; intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; if (heap_fraction > max_heap_fraction) { heap_fraction = max_heap_fraction; @@ -1585,7 +1585,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked*2; validate_spaces(gcstate); return; } @@ -1597,7 +1597,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) } intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked*2; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); PyGC_Head increment; gc_list_init(&increment); @@ -1608,7 +1608,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); gc_list_validate_space(&increment, gcstate->visited_space); assert(working.top == NULL); - while (increment_size < gcstate->work_to_do * 2) { + while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } @@ -1628,7 +1628,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do -= increment_size/2; + gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { From 3c2116edacadf0a5d81110529edb92459f3733f8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Dec 2024 16:54:54 +0000 Subject: [PATCH 20/23] Do some algebra --- InternalDocs/garbage_collector.md | 49 +++++++++++++++++++++++++++---- Python/gc.c | 12 ++++---- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 08db080a200ea4..07b3c655000aee 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -199,22 +199,22 @@ unreachable: ```pycon >>> import gc ->>> +>>> >>> class Link: ... def __init__(self, next_link=None): ... self.next_link = next_link -... +... >>> link_3 = Link() >>> link_2 = Link(link_3) >>> link_1 = Link(link_2) >>> link_3.next_link = link_1 >>> A = link_1 >>> del link_1, link_2, link_3 ->>> +>>> >>> link_4 = Link() >>> link_4.next_link = link_4 >>> del link_4 ->>> +>>> >>> # Collect the unreachable Link object (and its .__dict__ dict). >>> gc.collect() 2 @@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. >>> # Create a reference cycle. >>> x = MyObj() >>> x.self = x ->>> +>>> >>> # Initially the object is in the young generation. >>> gc.get_objects(generation=0) [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ->>> +>>> >>> # After a collection of the youngest generation the object >>> # moves to the old generation. >>> gc.collect(generation=0) @@ -515,6 +515,43 @@ increment. All objects directly referred to from those stack frames are added to the working set. Then the above algorithm is repeated, starting from step 2. +Determining how much work to do +------------------------------- + +We need to do a certain amount of work to enusre that garbage is collected, +but doing too much work slows down execution. + +To work out how much work we need to do, consider a heap with `L` live objects +and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects +at the end of the scavenge. We don't want amount of garbage to grow, `G1 ≤ G0`, and +we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. +For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which +`G1` garbage objects are created. + +The number of new objects created `N` must be at least the new garbage created, `N ≥ G`. +If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` +For a steady state heap `G0 == G1` we get `L > 2G` and the desired garbage ratio. + +In other words, to keep the garbage fraction to 1/3 or less we need to visit +4 times as many objects as are newly created. + +We can do better than this though. Not all new objects will be garbage. +Consider the heap at the end of the scavenge with `L1` live objects and `G1` +garbage. Also, note that `T == M + I` where `M` is the number of objects marked +as reachable and `I` is the number of objects visited in increments. +Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. + +If we choose the amount of work done such that `3*M + I == 8N` then we can do +do less work in most cases, but are still guaranteed to keep up. +Since `I ≥ G0 + G1` (not strictly true, but close enough) +`T == M + I == (8N + 2I)/3` and `(8N + 2I)/3 ≥ 4G`, so we can keep up. + +The reason that this improves performance is that `M` is usually much larger +than `I` Suppose `M == 10I`, then `T < 3N`. + +Finally, instead of using a fixed multiple of 8, we gradually increase it as the +heap grows. This avoids wasting work for small heaps and during startup. + Optimization: reusing fields to save memory =========================================== diff --git a/Python/gc.c b/Python/gc.c index 338d81eb2d631b..25be5008bba180 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1566,7 +1566,7 @@ assess_work_to_do(GCState *gcstate) scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*5; + intptr_t max_heap_fraction = new_objects*7; intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; if (heap_fraction > max_heap_fraction) { heap_fraction = max_heap_fraction; @@ -1575,6 +1575,8 @@ assess_work_to_do(GCState *gcstate) return new_objects + heap_fraction; } +#define MARKING_PROGRESS_MULTIPLIER 3 + static void gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { @@ -1585,19 +1587,15 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked*2; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; validate_spaces(gcstate); return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - int scale_factor = gcstate->old[0].threshold; - if (scale_factor < 2) { - scale_factor = 2; - } intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked*2; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); PyGC_Head increment; gc_list_init(&increment); From 72d0284492ff0adc8e15e01e8a819853f1d1a85a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Dec 2024 17:49:48 +0000 Subject: [PATCH 21/23] Revert to 2M+I from 3M+I --- InternalDocs/garbage_collector.md | 6 +++--- Python/gc.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 07b3c655000aee..7e4ce873c9742b 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -541,13 +541,13 @@ garbage. Also, note that `T == M + I` where `M` is the number of objects marked as reachable and `I` is the number of objects visited in increments. Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. -If we choose the amount of work done such that `3*M + I == 8N` then we can do +If we choose the amount of work done such that `2*M + I == 6N` then we can do do less work in most cases, but are still guaranteed to keep up. Since `I ≥ G0 + G1` (not strictly true, but close enough) -`T == M + I == (8N + 2I)/3` and `(8N + 2I)/3 ≥ 4G`, so we can keep up. +`T == M + I == (6N + I)/2` and `(6N + I)/2 ≥ 4G`, so we can keep up. The reason that this improves performance is that `M` is usually much larger -than `I` Suppose `M == 10I`, then `T < 3N`. +than `I` Suppose `M == 10I`, then `T ≅ 3N`. Finally, instead of using a fixed multiple of 8, we gradually increase it as the heap grows. This avoids wasting work for small heaps and during startup. diff --git a/Python/gc.c b/Python/gc.c index 25be5008bba180..8c671b4fb83f44 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1566,7 +1566,7 @@ assess_work_to_do(GCState *gcstate) scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*7; + intptr_t max_heap_fraction = new_objects*5; intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; if (heap_fraction > max_heap_fraction) { heap_fraction = max_heap_fraction; @@ -1575,7 +1575,8 @@ assess_work_to_do(GCState *gcstate) return new_objects + heap_fraction; } -#define MARKING_PROGRESS_MULTIPLIER 3 +/* See Internal GC docs for explanation */ +#define MARKING_PROGRESS_MULTIPLIER 2 static void gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) From 0f182e2467a97b52335ca9274ade8b1ce0982445 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Dec 2024 11:44:44 +0000 Subject: [PATCH 22/23] Address review comments --- InternalDocs/garbage_collector.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 7e4ce873c9742b..69e4780c3efe66 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -528,7 +528,8 @@ we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which `G1` garbage objects are created. -The number of new objects created `N` must be at least the new garbage created, `N ≥ G`. +The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`, +assuming that the number of live objects remains roughly constant. If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` For a steady state heap `G0 == G1` we get `L > 2G` and the desired garbage ratio. @@ -542,7 +543,7 @@ as reachable and `I` is the number of objects visited in increments. Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. If we choose the amount of work done such that `2*M + I == 6N` then we can do -do less work in most cases, but are still guaranteed to keep up. +less work in most cases, but are still guaranteed to keep up. Since `I ≥ G0 + G1` (not strictly true, but close enough) `T == M + I == (6N + I)/2` and `(6N + I)/2 ≥ 4G`, so we can keep up. From d3c21bb57092428592b86af28246ed1221b002f5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Dec 2024 18:32:31 +0000 Subject: [PATCH 23/23] Address review comments and clarify code a bit --- InternalDocs/garbage_collector.md | 10 +++++----- Python/gc.c | 30 +++++++++++------------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 69e4780c3efe66..4761f78f3593e3 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -523,7 +523,7 @@ but doing too much work slows down execution. To work out how much work we need to do, consider a heap with `L` live objects and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects -at the end of the scavenge. We don't want amount of garbage to grow, `G1 ≤ G0`, and +at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which `G1` garbage objects are created. @@ -531,7 +531,7 @@ For each full scavenge we must visit all objects, `T == L + G0 + G1`, during whi The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`, assuming that the number of live objects remains roughly constant. If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` -For a steady state heap `G0 == G1` we get `L > 2G` and the desired garbage ratio. +For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio. In other words, to keep the garbage fraction to 1/3 or less we need to visit 4 times as many objects as are newly created. @@ -544,11 +544,11 @@ Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + If we choose the amount of work done such that `2*M + I == 6N` then we can do less work in most cases, but are still guaranteed to keep up. -Since `I ≥ G0 + G1` (not strictly true, but close enough) -`T == M + I == (6N + I)/2` and `(6N + I)/2 ≥ 4G`, so we can keep up. +Since `I ≳ G0 + G1` (not strictly true, but close enough) +`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up. The reason that this improves performance is that `M` is usually much larger -than `I` Suppose `M == 10I`, then `T ≅ 3N`. +than `I`. If `M == 10I`, then `T ≅ 3N`. Finally, instead of using a fixed multiple of 8, we gradually increase it as the heap grows. This avoids wasting work for small heaps and during startup. diff --git a/Python/gc.c b/Python/gc.c index 8c671b4fb83f44..fd29a48518e71b 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1544,40 +1544,32 @@ mark_at_start(PyThreadState *tstate) return objects_marked; } + +/* See InternalDocs/garbage_collector.md for more details. */ +#define MAX_HEAP_PORTION_MULTIPLIER 5 +#define MARKING_PROGRESS_MULTIPLIER 2 + static intptr_t assess_work_to_do(GCState *gcstate) { /* The amount of work we want to do depends on two things. * 1. The number of new objects created - * 2. The heap size (up to twice the number of new objects, to avoid quadratic effects) - * 3. The amount of garbage. - * - * We cannot know how much of the heap is garbage, but we know that no reachable object - * is garbage. We make a (fairly pessismistic) assumption that half the heap not - * reachable from the roots is garbage, and count collections of increments as half as efficient - * as processing the heap as the marking phase. - * - * For a large, steady state heap, the amount of work to do is at least three times the - * number of new objects added to the heap. This ensures that we stay ahead in the - * worst case of all new objects being garbage. + * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects) */ intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*5; - intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - if (heap_fraction > max_heap_fraction) { - heap_fraction = max_heap_fraction; + intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER; + intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_portion > max_heap_portion) { + heap_portion = max_heap_portion; } gcstate->young.count = 0; - return new_objects + heap_fraction; + return new_objects + heap_portion; } -/* See Internal GC docs for explanation */ -#define MARKING_PROGRESS_MULTIPLIER 2 - static void gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) {