From d0672a4351513ad49f9a24a5267105d57d5e5ea2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Mar 2024 12:10:53 +0000 Subject: [PATCH 1/4] Change the size of the increment to about 1% of the total heap size. --- Include/internal/pycore_gc.h | 1 + Lib/test/test_gc.py | 34 +++++++++++++------ ...-03-21-12-10-11.gh-issue-117108._6jIrB.rst | 3 ++ Python/gc.c | 27 +++++++-------- 4 files changed, 40 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 9d66e62ba8b5e3..bea5d4327384f1 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -282,6 +282,7 @@ struct _gc_runtime_state { /* a list of callbacks to be invoked when collection is performed */ PyObject *callbacks; + Py_ssize_t heap_size; Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ int visited_space; diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index ce01916bcabe4f..92c1e9a4047438 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1058,7 +1058,18 @@ class Z: callback.assert_not_called() gc.enable() + +class IncremetalGCTests(unittest.TestCase): + + def setUp(self): + gc.enable() + + def tearDown(self): + gc.disable() + @unittest.skipIf(Py_GIL_DISABLED, "Free threading does not support incremental GC") + # Use small increments to emulate longer running process in a shorter time + @gc_threshold(200, 10) def test_incremental_gc_handles_fast_cycle_creation(self): class LinkedList: @@ -1080,28 +1091,31 @@ def make_ll(depth): head = LinkedList(head, head.prev) return head - head = make_ll(10000) - count = 10000 + head = make_ll(1000) + count = 1000 - # We expect the counts to go negative eventually - # as there will some objects we aren't counting, - # e.g. the gc stats dicts. The test merely checks - # that the counts don't grow. + # There will some objects we aren't counting, + # e.g. the gc stats dicts. This test checks + # that the counts don't grow, so we try to + # correct for the uncounted objects + # This is just an estimate. + CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] - for i in range(1000): - newhead = make_ll(200) - count += 200 + for i in range(20_000): + newhead = make_ll(20) + count += 20 newhead.surprise = head olds.append(newhead) - if len(olds) == 50: + if len(olds) == 20: stats = gc.get_stats() young = stats[0] incremental = stats[1] old = stats[2] collected = young['collected'] + incremental['collected'] + old['collected'] + count += CORRECTION live = count - collected self.assertLess(live, 25000) del olds[:] diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst b/Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst new file mode 100644 index 00000000000000..57ad9606b05e05 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst @@ -0,0 +1,3 @@ +The cycle GC now chooses the size of increments based on the total heap +size, instead of the rate of object creation. This ensures that it can keep +up with growing heaps. diff --git a/Python/gc.c b/Python/gc.c index d0f4ce38bbe567..47179ec03af904 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -182,6 +182,7 @@ _PyGC_Init(PyInterpreterState *interp) if (gcstate->callbacks == NULL) { return _PyStatus_NO_MEMORY(); } + gcstate->heap_size = 0; return _PyStatus_OK(); } @@ -1258,9 +1259,9 @@ gc_list_set_space(PyGC_Head *list, uintptr_t space) * N == 1.4 (1 + 4/threshold) */ -/* Multiply by 4 so that the default incremental threshold of 10 - * scans objects at 20% the rate of object creation */ -#define SCAN_RATE_MULTIPLIER 2 +/* Divide by 10, so that the default incremental threshold of 10 + * scans objects at 1% of the heap size */ +#define SCAN_RATE_DIVISOR 10 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1313,7 +1314,7 @@ gc_collect_young(PyThreadState *tstate, if (scale_factor < 1) { scale_factor = 1; } - gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor; + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); } @@ -1384,12 +1385,9 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat static void completed_cycle(GCState *gcstate) { - PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; - assert(gc_list_is_empty(not_visited)); + assert(gc_list_is_empty(&gcstate->old[gcstate->visited_space^1].head)); gcstate->visited_space = flip_old_space(gcstate->visited_space); - if (gcstate->work_to_do > 0) { - gcstate->work_to_do = 0; - } + gcstate->work_to_do = 0; } static void @@ -1404,13 +1402,13 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (scale_factor < 1) { scale_factor = 1; } - Py_ssize_t increment_size = 0; gc_list_merge(&gcstate->young.head, &increment); gcstate->young.count = 0; if (gcstate->visited_space) { /* objects in visited space have bit set, so we set it here */ gc_list_set_space(&increment, 1); } + Py_ssize_t increment_size = 0; while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; @@ -1425,14 +1423,11 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head survivors; gc_list_init(&survivors); gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats); - Py_ssize_t survivor_count = gc_list_size(&survivors); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor; + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; - if (gcstate->work_to_do < 0) { - gcstate->work_to_do = 0; - } + validate_old(gcstate); add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { @@ -1974,6 +1969,7 @@ _PyObject_GC_Link(PyObject *op) gc->_gc_next = 0; gc->_gc_prev = 0; gcstate->young.count++; /* number of allocated GC objects */ + gcstate->heap_size++; if (gcstate->young.count > gcstate->young.threshold && gcstate->enabled && gcstate->young.threshold && @@ -2095,6 +2091,7 @@ PyObject_GC_Del(void *op) if (gcstate->young.count > 0) { gcstate->young.count--; } + gcstate->heap_size--; PyObject_Free(((char *)op)-presize); } From 11987ce691b50c2942f164816c1f7a79219d64ce Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Mar 2024 08:44:06 +0000 Subject: [PATCH 2/4] Fix some compiler warnings --- Include/internal/pycore_gc.h | 2 +- Modules/gcmodule.c | 2 +- Python/gc.c | 4 ++-- Python/gc_free_threading.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index bea5d4327384f1..e729616936f03b 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -322,7 +322,7 @@ extern void _PyGC_Unfreeze(PyInterpreterState *interp); /* Number of frozen objects */ extern Py_ssize_t _PyGC_GetFreezeCount(PyInterpreterState *interp); -extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation); +extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, int generation); extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs); // Functions to clear types free lists diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 3320e54dd9fe93..8a1b483eddae35 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -326,7 +326,7 @@ gc_get_objects_impl(PyObject *module, Py_ssize_t generation) } PyInterpreterState *interp = _PyInterpreterState_GET(); - return _PyGC_GetObjects(interp, generation); + return _PyGC_GetObjects(interp, (int)generation); } /*[clinic input] diff --git a/Python/gc.c b/Python/gc.c index 47179ec03af904..b9666057cec4ae 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1233,7 +1233,7 @@ gc_collect_region(PyThreadState *tstate, struct gc_collection_stats *stats); static inline Py_ssize_t -gc_list_set_space(PyGC_Head *list, uintptr_t space) +gc_list_set_space(PyGC_Head *list, int space) { Py_ssize_t size = 0; PyGC_Head *gc; @@ -1673,7 +1673,7 @@ _PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) } PyObject * -_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) +_PyGC_GetObjects(PyInterpreterState *interp, int generation) { assert(generation >= -1 && generation < NUM_GENERATIONS); GCState *gcstate = &interp->gc; diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 52c79c02099b53..69ce22a1e83b62 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1305,7 +1305,7 @@ visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area, } PyObject * -_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) +_PyGC_GetObjects(PyInterpreterState *interp, int generation) { PyObject *result = PyList_New(0); if (!result) { From ed856245b2f74b91a105880aa4dc236c972b80d5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Mar 2024 17:29:03 +0000 Subject: [PATCH 3/4] Address review comments --- Lib/test/test_gc.py | 2 +- Python/gc.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 92c1e9a4047438..051216d82cd513 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1094,7 +1094,7 @@ def make_ll(depth): head = make_ll(1000) count = 1000 - # There will some objects we aren't counting, + # There will be some objects we aren't counting, # e.g. the gc stats dicts. This test checks # that the counts don't grow, so we try to # correct for the uncounted objects diff --git a/Python/gc.c b/Python/gc.c index b9666057cec4ae..2517b86a41fa53 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1385,7 +1385,10 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat static void completed_cycle(GCState *gcstate) { - assert(gc_list_is_empty(&gcstate->old[gcstate->visited_space^1].head)); +#ifdef Py_DEBUG + PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; + assert(gc_list_is_empty(not_visited)); +#endif gcstate->visited_space = flip_old_space(gcstate->visited_space); gcstate->work_to_do = 0; } From 18510eec3962994076a686df48bc68c5b4622aff Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Mar 2024 17:42:23 +0000 Subject: [PATCH 4/4] Address review comment --- Lib/test/test_gc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 051216d82cd513..57acbac5859e7f 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1059,9 +1059,10 @@ class Z: gc.enable() -class IncremetalGCTests(unittest.TestCase): +class IncrementalGCTests(unittest.TestCase): def setUp(self): + # Reenable GC as it is disabled module-wide gc.enable() def tearDown(self):