Skip to content

Commit 66959ca

Browse files
committed
gh-114746: Avoid quadratic behavior in free-threaded GC
The free-threaded build's GC implementation is non-generational, but was scheduled as if it were collecting a young generation leading to quadratic behavior. This increases the minimum threshold and scales it to the number of live objects as we do for the old generation in the default build. Note that the scheduling is still not thread-safe without the GIL. Those changes will come in later PRs. A few tests, like "test_sneaky_frame_object" rely on prompt scheduling of the GC. For now, to keep that test passing, we disable the scaled threshold after calls like `gc.set_threshold(1, 0, 0)`.
1 parent b905fad commit 66959ca

File tree

1 file changed

+29
-73
lines changed

1 file changed

+29
-73
lines changed

Python/gc_free_threading.c

+29-73
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct collection_state {
4646
GCState *gcstate;
4747
Py_ssize_t collected;
4848
Py_ssize_t uncollectable;
49+
Py_ssize_t long_lived_total;
4950
struct worklist unreachable;
5051
struct worklist legacy_finalizers;
5152
struct worklist wrcb_to_call;
@@ -443,7 +444,7 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
443444
else {
444445
// object is reachable, restore `ob_tid`; we're done with these objects
445446
gc_restore_tid(op);
446-
state->gcstate->long_lived_total++;
447+
state->long_lived_total++;
447448
}
448449

449450
return true;
@@ -605,6 +606,8 @@ get_gc_state(void)
605606
void
606607
_PyGC_InitState(GCState *gcstate)
607608
{
609+
// TODO: move to pycore_runtime_init.h once the incremental GC lands.
610+
gcstate->generations[0].threshold = 2000;
608611
}
609612

610613

@@ -885,62 +888,6 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase,
885888
assert(!_PyErr_Occurred(tstate));
886889
}
887890

888-
889-
/* Find the oldest generation (highest numbered) where the count
890-
* exceeds the threshold. Objects in the that generation and
891-
* generations younger than it will be collected. */
892-
static int
893-
gc_select_generation(GCState *gcstate)
894-
{
895-
for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
896-
if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
897-
/* Avoid quadratic performance degradation in number
898-
of tracked objects (see also issue #4074):
899-
900-
To limit the cost of garbage collection, there are two strategies;
901-
- make each collection faster, e.g. by scanning fewer objects
902-
- do less collections
903-
This heuristic is about the latter strategy.
904-
905-
In addition to the various configurable thresholds, we only trigger a
906-
full collection if the ratio
907-
908-
long_lived_pending / long_lived_total
909-
910-
is above a given value (hardwired to 25%).
911-
912-
The reason is that, while "non-full" collections (i.e., collections of
913-
the young and middle generations) will always examine roughly the same
914-
number of objects -- determined by the aforementioned thresholds --,
915-
the cost of a full collection is proportional to the total number of
916-
long-lived objects, which is virtually unbounded.
917-
918-
Indeed, it has been remarked that doing a full collection every
919-
<constant number> of object creations entails a dramatic performance
920-
degradation in workloads which consist in creating and storing lots of
921-
long-lived objects (e.g. building a large list of GC-tracked objects would
922-
show quadratic performance, instead of linear as expected: see issue #4074).
923-
924-
Using the above ratio, instead, yields amortized linear performance in
925-
the total number of objects (the effect of which can be summarized
926-
thusly: "each full garbage collection is more and more costly as the
927-
number of objects grows, but we do fewer and fewer of them").
928-
929-
This heuristic was suggested by Martin von Löwis on python-dev in
930-
June 2008. His original analysis and proposal can be found at:
931-
http://mail.python.org/pipermail/python-dev/2008-June/080579.html
932-
*/
933-
if (i == NUM_GENERATIONS - 1
934-
&& gcstate->long_lived_pending < gcstate->long_lived_total / 4)
935-
{
936-
continue;
937-
}
938-
return i;
939-
}
940-
}
941-
return -1;
942-
}
943-
944891
static void
945892
cleanup_worklist(struct worklist *worklist)
946893
{
@@ -952,6 +899,21 @@ cleanup_worklist(struct worklist *worklist)
952899
}
953900
}
954901

902+
static bool
903+
gc_should_collect(GCState *gcstate)
904+
{
905+
int count = _Py_atomic_load_int_relaxed(&gcstate->generations[0].count);
906+
int threshold = gcstate->generations[0].threshold;
907+
if (count <= threshold || threshold == 0 || !gcstate->enabled) {
908+
return false;
909+
}
910+
// Avoid quadratic behavior by scaling threshold to the number of live
911+
// objects. A few tests rely on immediate scheduling of the GC so we ignore
912+
// the scaled threshold if generations[1].threshold is set to zero.
913+
return (count > gcstate->long_lived_total / 4 ||
914+
gcstate->generations[1].threshold == 0);
915+
}
916+
955917
static void
956918
gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
957919
{
@@ -1029,15 +991,10 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
1029991
return 0;
1030992
}
1031993

1032-
if (generation == GENERATION_AUTO) {
1033-
// Select the oldest generation that needs collecting. We will collect
1034-
// objects from that generation and all generations younger than it.
1035-
generation = gc_select_generation(gcstate);
1036-
if (generation < 0) {
1037-
// No generation needs to be collected.
1038-
_Py_atomic_store_int(&gcstate->collecting, 0);
1039-
return 0;
1040-
}
994+
if (reason == _Py_GC_REASON_HEAP && !gc_should_collect(gcstate)) {
995+
// Don't collect if the threshold is not exceeded.
996+
_Py_atomic_store_int(&gcstate->collecting, 0);
997+
return 0;
1041998
}
1042999

10431000
assert(generation >= 0 && generation < NUM_GENERATIONS);
@@ -1082,6 +1039,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
10821039

10831040
m = state.collected;
10841041
n = state.uncollectable;
1042+
gcstate->long_lived_total = state.long_lived_total;
10851043

10861044
if (gcstate->debug & _PyGC_DEBUG_STATS) {
10871045
double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
@@ -1523,12 +1481,10 @@ _PyObject_GC_Link(PyObject *op)
15231481
{
15241482
PyThreadState *tstate = _PyThreadState_GET();
15251483
GCState *gcstate = &tstate->interp->gc;
1526-
gcstate->generations[0].count++; /* number of allocated GC objects */
1527-
if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
1528-
gcstate->enabled &&
1529-
gcstate->generations[0].threshold &&
1530-
!_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
1531-
!_PyErr_Occurred(tstate))
1484+
gcstate->generations[0].count++;
1485+
1486+
if (gc_should_collect(gcstate) &&
1487+
!_Py_atomic_load_int_relaxed(&gcstate->collecting))
15321488
{
15331489
_Py_ScheduleGC(tstate->interp);
15341490
}
@@ -1537,7 +1493,7 @@ _PyObject_GC_Link(PyObject *op)
15371493
void
15381494
_Py_RunGC(PyThreadState *tstate)
15391495
{
1540-
gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
1496+
gc_collect_main(tstate, 0, _Py_GC_REASON_HEAP);
15411497
}
15421498

15431499
static PyObject *

0 commit comments

Comments
 (0)