Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 602c8e0

Browse files
committed
Implement per thread heap profiling.
Rename data structures (prof_thr_cnt_t-->prof_tctx_t, prof_ctx_t-->prof_gctx_t), and convert to storing a prof_tctx_t for sampled objects. Convert PROF_ALLOC_PREP() to prof_alloc_prep(), since precise backtrace depth within jemalloc functions is no longer an issue (pprof prunes irrelevant frames). Implement mallctl's: - prof.reset implements full sample data reset, and optional change of sample interval. - prof.lg_sample reads the current sample interval (opt.lg_prof_sample was the permanent source of truth prior to prof.reset). - thread.prof.name provides naming capability for threads within heap profile dumps. - thread.prof.active makes it possible to activate/deactivate heap profiling for individual threads. Modify the heap dump files to contain per thread heap profile data. This change is incompatible with the existing pprof, which will require enhancements to read and process the enriched data.
1 parent 1628e86 commit 602c8e0

File tree

11 files changed

+1217
-706
lines changed

11 files changed

+1217
-706
lines changed

doc/jemalloc.xml.in

+55-1
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
10471047
<varlistentry id="opt.lg_prof_sample">
10481048
<term>
10491049
<mallctl>opt.lg_prof_sample</mallctl>
1050-
(<type>ssize_t</type>)
1050+
(<type>size_t</type>)
10511051
<literal>r-</literal>
10521052
[<option>--enable-prof</option>]
10531053
</term>
@@ -1243,6 +1243,35 @@ malloc_conf = "xmalloc:true";]]></programlisting>
12431243
the developer may find manual flushing useful.</para></listitem>
12441244
</varlistentry>
12451245

1246+
<varlistentry id="thread.prof.name">
1247+
<term>
1248+
<mallctl>thread.prof.name</mallctl>
1249+
(<type>const char *</type>)
1250+
<literal>rw</literal>
1251+
[<option>--enable-prof</option>]
1252+
</term>
1253+
<listitem><para>Get/set the descriptive name associated with the calling
1254+
thread in memory profile dumps. An internal copy of the name string is
1255+
created, so the input string need not be maintained after this interface
1256+
completes execution. The output string of this interface should be
1257+
copied for non-ephemeral uses, because multiple implementation details
1258+
can cause asynchronous string deallocation.</para></listitem>
1259+
</varlistentry>
1260+
1261+
<varlistentry id="thread.prof.active">
1262+
<term>
1263+
<mallctl>thread.prof.active</mallctl>
1264+
(<type>bool</type>)
1265+
<literal>rw</literal>
1266+
[<option>--enable-prof</option>]
1267+
</term>
1268+
<listitem><para>Control whether sampling is currently active for the
1269+
calling thread. This is a deactivation mechanism in addition to <link
1270+
linkend="prof.active"><mallctl>prof.active</mallctl></link>; both must
1271+
be active for the calling thread to sample. This flag is enabled by
1272+
default.</para></listitem>
1273+
</varlistentry>
1274+
12461275
<varlistentry id="arena.i.purge">
12471276
<term>
12481277
<mallctl>arena.&lt;i&gt;.purge</mallctl>
@@ -1492,6 +1521,31 @@ malloc_conf = "xmalloc:true";]]></programlisting>
14921521
option.</para></listitem>
14931522
</varlistentry>
14941523

1524+
<varlistentry id="prof.reset">
1525+
<term>
1526+
<mallctl>prof.reset</mallctl>
1527+
(<type>size_t</type>)
1528+
<literal>-w</literal>
1529+
[<option>--enable-prof</option>]
1530+
</term>
1531+
<listitem><para>Reset all memory profile statistics, and optionally
1532+
update the sample rate (see <link
1533+
linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>).
1534+
</para></listitem>
1535+
</varlistentry>
1536+
1537+
<varlistentry id="prof.lg_sample">
1538+
<term>
1539+
<mallctl>prof.lg_sample</mallctl>
1540+
(<type>size_t</type>)
1541+
<literal>r-</literal>
1542+
[<option>--enable-prof</option>]
1543+
</term>
1544+
<listitem><para>Get the sample rate (see <link
1545+
linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>).
1546+
</para></listitem>
1547+
</varlistentry>
1548+
14951549
<varlistentry id="prof.interval">
14961550
<term>
14971551
<mallctl>prof.interval</mallctl>

include/jemalloc/internal/arena.h

+11-11
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ typedef struct arena_s arena_t;
5858
struct arena_chunk_map_s {
5959
#ifndef JEMALLOC_PROF
6060
/*
61-
* Overlay prof_ctx in order to allow it to be referenced by dead code.
61+
* Overlay prof_tctx in order to allow it to be referenced by dead code.
6262
* Such antics aren't warranted for per arena data structures, but
6363
* chunk map overhead accounts for a percentage of memory, rather than
6464
* being just a fixed cost.
@@ -75,7 +75,7 @@ struct arena_chunk_map_s {
7575
rb_node(arena_chunk_map_t) rb_link;
7676

7777
/* Profile counters, used for large object runs. */
78-
prof_ctx_t *prof_ctx;
78+
prof_tctx_t *prof_tctx;
7979
#ifndef JEMALLOC_PROF
8080
}; /* union { ... }; */
8181
#endif
@@ -472,8 +472,8 @@ size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
472472
size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
473473
unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
474474
const void *ptr);
475-
prof_ctx_t *arena_prof_ctx_get(const void *ptr);
476-
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
475+
prof_tctx_t *arena_prof_tctx_get(const void *ptr);
476+
void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
477477
void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
478478
size_t arena_salloc(const void *ptr, bool demote);
479479
void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache);
@@ -987,10 +987,10 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
987987
return (regind);
988988
}
989989

990-
JEMALLOC_INLINE prof_ctx_t *
991-
arena_prof_ctx_get(const void *ptr)
990+
JEMALLOC_INLINE prof_tctx_t *
991+
arena_prof_tctx_get(const void *ptr)
992992
{
993-
prof_ctx_t *ret;
993+
prof_tctx_t *ret;
994994
arena_chunk_t *chunk;
995995
size_t pageind, mapbits;
996996

@@ -1003,15 +1003,15 @@ arena_prof_ctx_get(const void *ptr)
10031003
mapbits = arena_mapbits_get(chunk, pageind);
10041004
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
10051005
if ((mapbits & CHUNK_MAP_LARGE) == 0)
1006-
ret = (prof_ctx_t *)(uintptr_t)1U;
1006+
ret = (prof_tctx_t *)(uintptr_t)1U;
10071007
else
1008-
ret = arena_mapp_get(chunk, pageind)->prof_ctx;
1008+
ret = arena_mapp_get(chunk, pageind)->prof_tctx;
10091009

10101010
return (ret);
10111011
}
10121012

10131013
JEMALLOC_INLINE void
1014-
arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
1014+
arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
10151015
{
10161016
arena_chunk_t *chunk;
10171017
size_t pageind;
@@ -1025,7 +1025,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
10251025
assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
10261026

10271027
if (arena_mapbits_large_get(chunk, pageind) != 0)
1028-
arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
1028+
arena_mapp_get(chunk, pageind)->prof_tctx = tctx;
10291029
}
10301030

10311031
JEMALLOC_ALWAYS_INLINE void *

include/jemalloc/internal/extent.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ struct extent_node_s {
1616
rb_node(extent_node_t) link_ad;
1717

1818
/* Profile counters, used for huge objects. */
19-
prof_ctx_t *prof_ctx;
19+
prof_tctx_t *prof_tctx;
2020

2121
/* Pointer to the extent that this tree node is responsible for. */
2222
void *addr;

include/jemalloc/internal/huge.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ extern huge_dalloc_junk_t *huge_dalloc_junk;
2121
#endif
2222
void huge_dalloc(void *ptr);
2323
size_t huge_salloc(const void *ptr);
24-
prof_ctx_t *huge_prof_ctx_get(const void *ptr);
25-
void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
24+
prof_tctx_t *huge_prof_tctx_get(const void *ptr);
25+
void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
2626
bool huge_boot(void);
2727
void huge_prefork(void);
2828
void huge_postfork_parent(void);

include/jemalloc/internal/private_symbols.txt

+14-7
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ arena_prefork
4848
arena_prof_accum
4949
arena_prof_accum_impl
5050
arena_prof_accum_locked
51-
arena_prof_ctx_get
52-
arena_prof_ctx_set
5351
arena_prof_promoted
52+
arena_prof_tctx_get
53+
arena_prof_tctx_set
5454
arena_ptr_small_binind_get
5555
arena_purge_all
5656
arena_quarantine_junk_small
@@ -208,8 +208,8 @@ huge_palloc
208208
huge_postfork_child
209209
huge_postfork_parent
210210
huge_prefork
211-
huge_prof_ctx_get
212-
huge_prof_ctx_set
211+
huge_prof_tctx_get
212+
huge_prof_tctx_set
213213
huge_ralloc
214214
huge_ralloc_no_move
215215
huge_salloc
@@ -287,28 +287,31 @@ opt_zero
287287
p2rz
288288
pages_purge
289289
pow2_ceil
290+
prof_alloc_prep
290291
prof_backtrace
291292
prof_boot0
292293
prof_boot1
293294
prof_boot2
294295
prof_bt_count
295-
prof_ctx_get
296-
prof_ctx_set
297296
prof_dump_open
298297
prof_free
298+
prof_free_sampled_object
299299
prof_gdump
300300
prof_idump
301301
prof_interval
302302
prof_lookup
303303
prof_malloc
304-
prof_malloc_record_object
304+
prof_malloc_sample_object
305305
prof_mdump
306306
prof_postfork_child
307307
prof_postfork_parent
308308
prof_prefork
309309
prof_realloc
310+
prof_reset
310311
prof_sample_accum_update
311312
prof_sample_threshold_update
313+
prof_tctx_get
314+
prof_tctx_set
312315
prof_tdata_booted
313316
prof_tdata_cleanup
314317
prof_tdata_get
@@ -322,6 +325,10 @@ prof_tdata_tsd_get
322325
prof_tdata_tsd_get_wrapper
323326
prof_tdata_tsd_init_head
324327
prof_tdata_tsd_set
328+
prof_thread_active_get
329+
prof_thread_active_set
330+
prof_thread_name_get
331+
prof_thread_name_set
325332
quarantine
326333
quarantine_alloc_hook
327334
quarantine_boot

0 commit comments

Comments
 (0)