@@ -46,6 +46,7 @@ struct collection_state {
46
46
GCState * gcstate ;
47
47
Py_ssize_t collected ;
48
48
Py_ssize_t uncollectable ;
49
+ Py_ssize_t long_lived_total ;
49
50
struct worklist unreachable ;
50
51
struct worklist legacy_finalizers ;
51
52
struct worklist wrcb_to_call ;
@@ -443,7 +444,7 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
443
444
else {
444
445
// object is reachable, restore `ob_tid`; we're done with these objects
445
446
gc_restore_tid (op );
446
- state -> gcstate -> long_lived_total ++ ;
447
+ state -> long_lived_total ++ ;
447
448
}
448
449
449
450
return true;
@@ -605,6 +606,8 @@ get_gc_state(void)
605
606
void
606
607
_PyGC_InitState (GCState * gcstate )
607
608
{
609
+ // TODO: move to pycore_runtime_init.h once the incremental GC lands.
610
+ gcstate -> generations [0 ].threshold = 2000 ;
608
611
}
609
612
610
613
@@ -885,62 +888,6 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase,
885
888
assert (!_PyErr_Occurred (tstate ));
886
889
}
887
890
888
-
889
- /* Find the oldest generation (highest numbered) where the count
890
- * exceeds the threshold. Objects in the that generation and
891
- * generations younger than it will be collected. */
892
- static int
893
- gc_select_generation (GCState * gcstate )
894
- {
895
- for (int i = NUM_GENERATIONS - 1 ; i >= 0 ; i -- ) {
896
- if (gcstate -> generations [i ].count > gcstate -> generations [i ].threshold ) {
897
- /* Avoid quadratic performance degradation in number
898
- of tracked objects (see also issue #4074):
899
-
900
- To limit the cost of garbage collection, there are two strategies;
901
- - make each collection faster, e.g. by scanning fewer objects
902
- - do less collections
903
- This heuristic is about the latter strategy.
904
-
905
- In addition to the various configurable thresholds, we only trigger a
906
- full collection if the ratio
907
-
908
- long_lived_pending / long_lived_total
909
-
910
- is above a given value (hardwired to 25%).
911
-
912
- The reason is that, while "non-full" collections (i.e., collections of
913
- the young and middle generations) will always examine roughly the same
914
- number of objects -- determined by the aforementioned thresholds --,
915
- the cost of a full collection is proportional to the total number of
916
- long-lived objects, which is virtually unbounded.
917
-
918
- Indeed, it has been remarked that doing a full collection every
919
- <constant number> of object creations entails a dramatic performance
920
- degradation in workloads which consist in creating and storing lots of
921
- long-lived objects (e.g. building a large list of GC-tracked objects would
922
- show quadratic performance, instead of linear as expected: see issue #4074).
923
-
924
- Using the above ratio, instead, yields amortized linear performance in
925
- the total number of objects (the effect of which can be summarized
926
- thusly: "each full garbage collection is more and more costly as the
927
- number of objects grows, but we do fewer and fewer of them").
928
-
929
- This heuristic was suggested by Martin von Löwis on python-dev in
930
- June 2008. His original analysis and proposal can be found at:
931
- http://mail.python.org/pipermail/python-dev/2008-June/080579.html
932
- */
933
- if (i == NUM_GENERATIONS - 1
934
- && gcstate -> long_lived_pending < gcstate -> long_lived_total / 4 )
935
- {
936
- continue ;
937
- }
938
- return i ;
939
- }
940
- }
941
- return -1 ;
942
- }
943
-
944
891
static void
945
892
cleanup_worklist (struct worklist * worklist )
946
893
{
@@ -952,6 +899,21 @@ cleanup_worklist(struct worklist *worklist)
952
899
}
953
900
}
954
901
902
+ static bool
903
+ gc_should_collect (GCState * gcstate )
904
+ {
905
+ int count = _Py_atomic_load_int_relaxed (& gcstate -> generations [0 ].count );
906
+ int threshold = gcstate -> generations [0 ].threshold ;
907
+ if (count <= threshold || threshold == 0 || !gcstate -> enabled ) {
908
+ return false;
909
+ }
910
+ // Avoid quadratic behavior by scaling threshold to the number of live
911
+ // objects. A few tests rely on immediate scheduling of the GC so we ignore
912
+ // the scaled threshold if generations[1].threshold is set to zero.
913
+ return (count > gcstate -> long_lived_total / 4 ||
914
+ gcstate -> generations [1 ].threshold == 0 );
915
+ }
916
+
955
917
static void
956
918
gc_collect_internal (PyInterpreterState * interp , struct collection_state * state )
957
919
{
@@ -1029,15 +991,10 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
1029
991
return 0 ;
1030
992
}
1031
993
1032
- if (generation == GENERATION_AUTO ) {
1033
- // Select the oldest generation that needs collecting. We will collect
1034
- // objects from that generation and all generations younger than it.
1035
- generation = gc_select_generation (gcstate );
1036
- if (generation < 0 ) {
1037
- // No generation needs to be collected.
1038
- _Py_atomic_store_int (& gcstate -> collecting , 0 );
1039
- return 0 ;
1040
- }
994
+ if (reason == _Py_GC_REASON_HEAP && !gc_should_collect (gcstate )) {
995
+ // Don't collect if the threshold is not exceeded.
996
+ _Py_atomic_store_int (& gcstate -> collecting , 0 );
997
+ return 0 ;
1041
998
}
1042
999
1043
1000
assert (generation >= 0 && generation < NUM_GENERATIONS );
@@ -1082,6 +1039,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
1082
1039
1083
1040
m = state .collected ;
1084
1041
n = state .uncollectable ;
1042
+ gcstate -> long_lived_total = state .long_lived_total ;
1085
1043
1086
1044
if (gcstate -> debug & _PyGC_DEBUG_STATS ) {
1087
1045
double d = _PyTime_AsSecondsDouble (_PyTime_GetPerfCounter () - t1 );
@@ -1523,12 +1481,10 @@ _PyObject_GC_Link(PyObject *op)
1523
1481
{
1524
1482
PyThreadState * tstate = _PyThreadState_GET ();
1525
1483
GCState * gcstate = & tstate -> interp -> gc ;
1526
- gcstate -> generations [0 ].count ++ ; /* number of allocated GC objects */
1527
- if (gcstate -> generations [0 ].count > gcstate -> generations [0 ].threshold &&
1528
- gcstate -> enabled &&
1529
- gcstate -> generations [0 ].threshold &&
1530
- !_Py_atomic_load_int_relaxed (& gcstate -> collecting ) &&
1531
- !_PyErr_Occurred (tstate ))
1484
+ gcstate -> generations [0 ].count ++ ;
1485
+
1486
+ if (gc_should_collect (gcstate ) &&
1487
+ !_Py_atomic_load_int_relaxed (& gcstate -> collecting ))
1532
1488
{
1533
1489
_Py_ScheduleGC (tstate -> interp );
1534
1490
}
@@ -1537,7 +1493,7 @@ _PyObject_GC_Link(PyObject *op)
1537
1493
void
1538
1494
_Py_RunGC (PyThreadState * tstate )
1539
1495
{
1540
- gc_collect_main (tstate , GENERATION_AUTO , _Py_GC_REASON_HEAP );
1496
+ gc_collect_main (tstate , 0 , _Py_GC_REASON_HEAP );
1541
1497
}
1542
1498
1543
1499
static PyObject *
0 commit comments