Skip to content

Commit b527fbc

Browse files
d-nettonickrobinson251
authored andcommitted
expose metric to report reasons why full GCs were triggered (JuliaLang#55826) (#189)
Additional GC observability tool. This will help us to diagnose why some of our servers are triggering so many full GCs in certain circumstances.
1 parent 9414a74 commit b527fbc

File tree

5 files changed

+67
-1
lines changed

5 files changed

+67
-1
lines changed

base/timing.jl

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,39 @@ function gc_page_utilization_data()
107107
return Base.unsafe_wrap(Array, page_utilization_raw, JL_GC_N_MAX_POOLS, own=false)
108108
end
109109

110+
# must be kept in sync with `src/gc.h``
111+
const FULL_SWEEP_REASONS = [:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL, :FULL_SWEEP_REASON_FORCED_FULL_SWEEP,
112+
:FULL_SWEEP_REASON_ALLOCATION_INTERVAL_ABOVE_MAXMEM, :FULL_SWEEP_REASON_LIVE_BYTES_ABOVE_MAX_TOTAL_MEMORY,
113+
:FULL_SWEEP_REASON_LARGE_INTERGEN_FRONTIER]
114+
115+
"""
116+
Base.full_sweep_reasons()
117+
118+
Return a dictionary of the number of times each full sweep reason has occurred.
119+
120+
The reasons are:
121+
- `:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL`: Full sweep was caused due to `always_full` being set in the GC debug environment
122+
- `:FULL_SWEEP_REASON_FORCED_FULL_SWEEP`: Full sweep was forced by `GC.gc(true)`
123+
- `:FULL_SWEEP_REASON_ALLOCATION_INTERVAL_ABOVE_MAXMEM`: Full sweep was forced by the allocation interval being above the total
124+
memory in the machine (as returned by LibUV) divided by the number of mutator threads
125+
- `:FULL_SWEEP_REASON_LIVE_BYTES_ABOVE_MAX_TOTAL_MEMORY`: Full sweep was caused due to live bytes being above the
126+
soft heap limit size (which is either automatically computed at initialization based on the total memory provided by LibUV,
127+
or set by the user via `--heap-size-hint`)
128+
- `:FULL_SWEEP_REASON_LARGE_INTERGEN_FRONTIER`: Full sweep was forced by the intergenerational frontier being too large
129+
(i.e. too many pointers in the remembered set)
130+
131+
Note that the set of reasons is not guaranteed to be stable across minor versions of Julia.
132+
"""
133+
function full_sweep_reasons()
134+
reason = cglobal(:jl_full_sweep_reasons, UInt64)
135+
reasons_as_array = Base.unsafe_wrap(Vector{UInt64}, reason, length(FULL_SWEEP_REASONS), own=false)
136+
d = Dict{Symbol, Int64}()
137+
for (i, r) in enumerate(FULL_SWEEP_REASONS)
138+
d[r] = reasons_as_array[i]
139+
end
140+
return d
141+
end
142+
110143
"""
111144
Base.jit_total_bytes()
112145

src/gc.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ uv_sem_t gc_sweep_assists_needed;
3737
uv_mutex_t gc_queue_observer_lock;
3838
// Tag for sentinel nodes in bigval list
3939
uintptr_t gc_bigval_sentinel_tag;
40+
// Table recording number of full GCs due to each reason
41+
JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
4042

4143
// Linked list of callback functions
4244

@@ -3551,6 +3553,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
35513553
if (large_frontier) {
35523554
sweep_full = 1;
35533555
gc_num.interval = last_long_collect_interval;
3556+
gc_count_full_sweep_reason(FULL_SWEEP_REASON_LARGE_INTERGEN_FRONTIER);
35543557
}
35553558
if (not_freed_enough || large_frontier) {
35563559
gc_num.interval = gc_num.interval * 2;
@@ -3566,6 +3569,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
35663569
if (gc_num.interval > maxmem) {
35673570
sweep_full = 1;
35683571
gc_num.interval = maxmem;
3572+
gc_count_full_sweep_reason(FULL_SWEEP_REASON_ALLOCATION_INTERVAL_ABOVE_MAXMEM);
35693573
}
35703574
}
35713575

@@ -3574,13 +3578,16 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
35743578
if (live_bytes > max_total_memory) {
35753579
under_memory_pressure = 1;
35763580
sweep_full = 1;
3581+
gc_count_full_sweep_reason(FULL_SWEEP_REASON_LIVE_BYTES_ABOVE_MAX_TOTAL_MEMORY);
35773582
}
35783583
if (gc_sweep_always_full) {
35793584
sweep_full = 1;
3585+
gc_count_full_sweep_reason(FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL);
35803586
}
35813587
if (collection == JL_GC_FULL && !prev_sweep_full) {
35823588
sweep_full = 1;
35833589
recollect = 1;
3590+
gc_count_full_sweep_reason(FULL_SWEEP_REASON_FORCED_FULL_SWEEP);
35843591
}
35853592
if (sweep_full) {
35863593
// these are the difference between the number of gc-perm bytes scanned

src/gc.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,21 @@ FORCE_INLINE void gc_big_object_link(bigval_t *sentinel_node, bigval_t *node) JL
560560
sentinel_node->next = node;
561561
}
562562

563+
// Must be kept in sync with `base/timing.jl`
564+
#define FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL (0)
565+
#define FULL_SWEEP_REASON_FORCED_FULL_SWEEP (1)
566+
#define FULL_SWEEP_REASON_ALLOCATION_INTERVAL_ABOVE_MAXMEM (2)
567+
#define FULL_SWEEP_REASON_LIVE_BYTES_ABOVE_MAX_TOTAL_MEMORY (3)
568+
#define FULL_SWEEP_REASON_LARGE_INTERGEN_FRONTIER (4)
569+
#define FULL_SWEEP_NUM_REASONS (5)
570+
571+
extern JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
572+
STATIC_INLINE void gc_count_full_sweep_reason(int reason) JL_NOTSAFEPOINT
573+
{
574+
assert(reason >= 0 && reason < FULL_SWEEP_NUM_REASONS);
575+
jl_full_sweep_reasons[reason]++;
576+
}
577+
563578
extern uv_mutex_t gc_threads_lock;
564579
extern uv_cond_t gc_threads_cond;
565580
extern uv_sem_t gc_sweep_assists_needed;

src/threading.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ JL_DLLEXPORT int jl_heartbeat_resume(void)
10351035
if (uv_sem_trywait(&heartbeat_off_sem) != 0) {
10361036
return -1;
10371037
}
1038-
1038+
10391039
// reset state as we've been paused
10401040
n_hbs_missed = 0;
10411041
n_hbs_recvd = 0;

test/gc.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@ function run_pg_size_test()
3030
@test page_size == (1 << 12) || page_size == (1 << 14)
3131
end
3232

33+
function full_sweep_reasons_test()
34+
GC.gc()
35+
reasons = Base.full_sweep_reasons()
36+
@test reasons[:FULL_SWEEP_REASON_FORCED_FULL_SWEEP] >= 1
37+
@test keys(reasons) == Set(Base.FULL_SWEEP_REASONS)
38+
end
39+
3340
# !!! note:
3441
# Since we run our tests on 32bit OS as well we confine ourselves
3542
# to parameters that allocate about 512MB of objects. Max RSS is lower
@@ -59,3 +66,7 @@ GC.enable(true); GC.gc(false) # incremental collection
5966
run_nonzero_page_utilization_test()
6067
run_pg_size_test()
6168
end
69+
70+
@testset "Full GC reasons" begin
71+
full_sweep_reasons_test()
72+
end

0 commit comments

Comments
 (0)