From d34a745c969258ba225e8cd7f83dbecef303f395 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Fri, 11 Apr 2025 01:16:37 +1200 Subject: [PATCH 1/7] gh-129210: Skip second reachability check if no finalizers run To handle objects potentially resurrected in finalizers it is necessary to repeat the reachability check on objects to be collected. If there are no finalizers run this can be skipped: in that case it is not possible for an object to be resurrected. In my environment and testing this gives a ~25-30% performance improvement for the gh-129210 micro-benchmark. --- Python/gc.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index dad088e09f872f..313ed0190a4cab 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1074,11 +1074,12 @@ handle_legacy_finalizers(PyThreadState *tstate, * Note that this may remove some (or even all) of the objects from the * list, due to refcounts falling to 0. */ -static void +static int finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) { destructor finalize; PyGC_Head seen; + int finalizer_run = 0; /* While we're going through the loop, `finalize(op)` may cause op, or * other objects, to be reclaimed via refcounts falling to zero. So @@ -1097,6 +1098,7 @@ finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) if (!_PyGC_FINALIZED(op) && (finalize = Py_TYPE(op)->tp_finalize) != NULL) { + finalizer_run = 1; _PyGC_SET_FINALIZED(op); Py_INCREF(op); finalize(op); @@ -1105,6 +1107,7 @@ finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) } } gc_list_merge(&seen, collectable); + return finalizer_run; } /* Break reference cycles by clearing the containers involved. This is @@ -1736,13 +1739,20 @@ gc_collect_region(PyThreadState *tstate, validate_list(&unreachable, collecting_set_unreachable_clear); /* Call tp_finalize on objects which have one. */ - finalize_garbage(tstate, &unreachable); - /* Handle any objects that may have resurrected after the call - * to 'finalize_garbage' and continue the collection with the - * objects that are still unreachable */ + int check_resurrected = finalize_garbage(tstate, &unreachable); + + /* If no finalizers have run, no objects can have been resurrected: in that + * case skip the resurrection check. Otherwise we need to check and handle + * any objects that may have resurrected after the call to + * 'finalize_garbage' and continue the collection with the objects that are + * still unreachable */ PyGC_Head final_unreachable; gc_list_init(&final_unreachable); - handle_resurrected_objects(&unreachable, &final_unreachable, to); + if (check_resurrected) { + handle_resurrected_objects(&unreachable, &final_unreachable, to); + } else { + gc_list_merge(&unreachable, &final_unreachable); + } /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects From 0464fb7c7cfe562af080f3081beba8eac6095c79 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Sun, 13 Apr 2025 01:22:47 +1200 Subject: [PATCH 2/7] gh-129210: Avoid unnecessary list init & merge Avoid an unnecessary list initialisation and merge: this is a very marginal optimisation which gains a couple of percent speed increase for a few benchmarks. The marginal performance improvement may not be worth the marginal additional code complexity. --- Python/gc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 313ed0190a4cab..daf1e3006859fc 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1742,24 +1742,26 @@ gc_collect_region(PyThreadState *tstate, int check_resurrected = finalize_garbage(tstate, &unreachable); /* If no finalizers have run, no objects can have been resurrected: in that - * case skip the resurrection check. Otherwise we need to check and handle - * any objects that may have resurrected after the call to - * 'finalize_garbage' and continue the collection with the objects that are - * still unreachable */ + * case skip the resurrection check and just use the existing unreachable + * list. Otherwise we need to check and handle any objects that may have + * resurrected after the call to 'finalize_garbage' and continue the + * collection with the objects that are still unreachable. */ PyGC_Head final_unreachable; - gc_list_init(&final_unreachable); + PyGC_Head *to_delete; if (check_resurrected) { + gc_list_init(&final_unreachable); handle_resurrected_objects(&unreachable, &final_unreachable, to); + to_delete = &final_unreachable; } else { - gc_list_merge(&unreachable, &final_unreachable); + to_delete = &unreachable; } /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. */ - stats->collected += gc_list_size(&final_unreachable); - delete_garbage(tstate, gcstate, &final_unreachable, to); + stats->collected += gc_list_size(to_delete); + delete_garbage(tstate, gcstate, to_delete, to); /* Collect statistics on uncollectable objects found and print * debugging information. */ From 6ae3f845aff46641c8bce702ec52757abe9a4f9b Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 16 Apr 2025 12:02:46 +1200 Subject: [PATCH 3/7] Add blurb --- .../2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst new file mode 100644 index 00000000000000..e76de3433fd90c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst @@ -0,0 +1,2 @@ +Optimise garbage collection in case where objects to collect have +finalizers, and hence cannot be resurrected. From c718a8eb3c2fa556353578514ca5c234fa5b2b2f Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Wed, 16 Apr 2025 14:49:34 +1200 Subject: [PATCH 4/7] gh-129210: Skip second reachability check in ft GC if no finalizers run As for the regular GIL garbage collector, skip the second reachability check if no objects to-be-collected have finalizers, and hence none can be resurrected. In (so far very limited) testing this gives a ~10% overall performance improvement to the motivating benchmark's runtime and a ~35% improvement to its reported time. The pyperformance gc_traversal and gc_collect benchmarks show no significant difference and a 6% improvement respectively. --- Python/gc_free_threading.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index fa4cb56f01e800..1eecf61682eee7 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1593,11 +1593,12 @@ debug_cycle(const char *msg, PyObject *op) * Note that this may remove some (or even all) of the objects from the * list, due to refcounts falling to 0. */ -static void +static int finalize_garbage(struct collection_state *state) { // NOTE: the unreachable worklist holds a strong reference to the object // to prevent it from being deallocated while we are holding on to it. + int finalizer_run = 0; PyObject *op; WORKSTACK_FOR_EACH(&state->unreachable, op) { if (!_PyGC_FINALIZED(op)) { @@ -1605,10 +1606,12 @@ finalize_garbage(struct collection_state *state) if (finalize != NULL) { _PyGC_SET_FINALIZED(op); finalize(op); + finalizer_run = 1; assert(!_PyErr_Occurred(_PyThreadState_GET())); } } } + return finalizer_run; } // Break reference cycles by clearing the containers involved. @@ -2009,11 +2012,13 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Call weakref callbacks and finalizers after unpausing other threads to // avoid potential deadlocks. call_weakref_callbacks(state); - finalize_garbage(state); + int check_resurrected = finalize_garbage(state); - // Handle any objects that may have resurrected after the finalization. _PyEval_StopTheWorld(interp); - err = handle_resurrected_objects(state); + // Handle any objects that may have resurrected after finalization, if any + if (check_resurrected) { + err = handle_resurrected_objects(state); + } // Clear free lists in all threads _PyGC_ClearAllFreeLists(interp); _PyEval_StartTheWorld(interp); From cd452694d723a7ce458e4d901f356d1fa48fba11 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Mon, 28 Apr 2025 15:49:06 +1200 Subject: [PATCH 5/7] Update Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst Co-authored-by: Jelle Zijlstra --- .../2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst index e76de3433fd90c..85041c26c1b0e7 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-16-12-02-26.gh-issue-129210.a5uLpE.rst @@ -1,2 +1,2 @@ -Optimise garbage collection in case where objects to collect have +Optimise garbage collection in case where no objects to collect have finalizers, and hence cannot be resurrected. From 27fb67c347d583d3714a82aa4f06b4c9bb3fd03c Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Mon, 28 Apr 2025 15:49:16 +1200 Subject: [PATCH 6/7] Update Python/gc.c Co-authored-by: Jelle Zijlstra --- Python/gc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/gc.c b/Python/gc.c index daf1e3006859fc..198f881fac50dc 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1073,6 +1073,7 @@ handle_legacy_finalizers(PyThreadState *tstate, /* Run first-time finalizers (if any) on all the objects in collectable. * Note that this may remove some (or even all) of the objects from the * list, due to refcounts falling to 0. + * Return 1 if any finalizers were run. */ static int finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) From 85f41b3fb22db140ab15224262d1610480aa5bab Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Mon, 28 Apr 2025 15:49:21 +1200 Subject: [PATCH 7/7] Update Python/gc_free_threading.c Co-authored-by: Jelle Zijlstra --- Python/gc_free_threading.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 1eecf61682eee7..e9a766fa338de9 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1592,6 +1592,7 @@ debug_cycle(const char *msg, PyObject *op) /* Run first-time finalizers (if any) on all the objects in collectable. * Note that this may remove some (or even all) of the objects from the * list, due to refcounts falling to 0. + * Return 1 if any finalizers were run. */ static int finalize_garbage(struct collection_state *state)