Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 3a81cbd

Browse files
committed
Dump heap profile backtraces in a stable order.
Also iterate over per thread stats in a stable order, which prepares the way for stable ordering of per thread heap profile dumps.
1 parent ab532e9 commit 3a81cbd

File tree

2 files changed

+119
-62
lines changed

2 files changed

+119
-62
lines changed

include/jemalloc/internal/prof.h

+14-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/******************************************************************************/
22
#ifdef JEMALLOC_H_TYPES
33

4+
typedef uint64_t prof_thr_uid_t;
45
typedef struct prof_bt_s prof_bt_t;
56
typedef struct prof_cnt_s prof_cnt_t;
67
typedef struct prof_thr_cnt_s prof_thr_cnt_t;
@@ -81,15 +82,17 @@ struct prof_cnt_s {
8182
};
8283

8384
struct prof_thr_cnt_s {
84-
/* Linkage into prof_ctx_t's cnts_ql. */
85-
ql_elm(prof_thr_cnt_t) cnts_link;
85+
prof_thr_uid_t thr_uid;
86+
87+
/* Linkage into prof_ctx_t's thr_cnts. */
88+
rb_node(prof_thr_cnt_t) thr_cnt_link;
8689

8790
/*
8891
* Associated context. If a thread frees an object that it did not
89-
* allocate, it is possible that the context is not cached in the
92+
* allocate, it is possible that the context is not present in the
9093
* thread's hash table, in which case it must be able to look up the
9194
* context, insert a new prof_thr_cnt_t into the thread's hash table,
92-
* and link it into the prof_ctx_t's cnts_ql.
95+
* and link it into the prof_ctx_t's thr_cnts.
9396
*/
9497
prof_ctx_t *ctx;
9598

@@ -113,9 +116,10 @@ struct prof_thr_cnt_s {
113116
/* Profiling counters. */
114117
prof_cnt_t cnts;
115118
};
119+
typedef rb_tree(prof_thr_cnt_t) prof_thr_cnt_tree_t;
116120

117121
struct prof_ctx_s {
118-
/* Protects nlimbo, cnt_merged, and cnts_ql. */
122+
/* Protects nlimbo, cnt_merged, and thr_cnts. */
119123
malloc_mutex_t *lock;
120124

121125
/*
@@ -136,21 +140,21 @@ struct prof_ctx_s {
136140
prof_cnt_t cnt_merged;
137141

138142
/*
139-
* List of profile counters, one for each thread that has allocated in
143+
* Tree of profile counters, one for each thread that has allocated in
140144
* this context.
141145
*/
142-
ql_head(prof_thr_cnt_t) cnts_ql;
146+
prof_thr_cnt_tree_t thr_cnts;
143147

144-
/* Linkage for list of contexts to be dumped. */
145-
ql_elm(prof_ctx_t) dump_link;
148+
/* Linkage for tree of contexts to be dumped. */
149+
rb_node(prof_ctx_t) dump_link;
146150

147151
/* Associated backtrace. */
148152
prof_bt_t bt;
149153

150154
/* Backtrace vector, variable size, referred to by bt. */
151155
void *vec[1];
152156
};
153-
typedef ql_head(prof_ctx_t) prof_ctx_list_t;
157+
typedef rb_tree(prof_ctx_t) prof_ctx_tree_t;
154158

155159
struct prof_tdata_s {
156160
/*

src/prof.c

+105-52
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,33 @@ static bool prof_booted = false;
7777

7878
/******************************************************************************/
7979

80+
JEMALLOC_INLINE_C int
81+
prof_thr_cnt_comp(const prof_thr_cnt_t *a, const prof_thr_cnt_t *b)
82+
{
83+
prof_thr_uid_t a_uid = a->thr_uid;
84+
prof_thr_uid_t b_uid = b->thr_uid;
85+
86+
return ((a_uid > b_uid) - (a_uid < b_uid));
87+
}
88+
89+
rb_gen(static UNUSED, thr_cnt_tree_, prof_thr_cnt_tree_t, prof_thr_cnt_t,
90+
thr_cnt_link, prof_thr_cnt_comp)
91+
92+
JEMALLOC_INLINE_C int
93+
prof_ctx_comp(const prof_ctx_t *a, const prof_ctx_t *b)
94+
{
95+
unsigned a_len = a->bt.len;
96+
unsigned b_len = b->bt.len;
97+
unsigned comp_len = (a_len < b_len) ? a_len : b_len;
98+
int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
99+
if (ret == 0)
100+
ret = (a_len > b_len) - (a_len < b_len);
101+
return (ret);
102+
}
103+
104+
rb_gen(static UNUSED, ctx_tree_, prof_ctx_tree_t, prof_ctx_t, dump_link,
105+
prof_ctx_comp)
106+
80107
void
81108
bt_init(prof_bt_t *bt, void **vec)
82109
{
@@ -369,9 +396,8 @@ prof_ctx_create(prof_bt_t *bt)
369396
* prof_ctx_merge()/prof_ctx_destroy().
370397
*/
371398
ctx->nlimbo = 1;
372-
ql_elm_new(ctx, dump_link);
373399
memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
374-
ql_new(&ctx->cnts_ql);
400+
thr_cnt_tree_new(&ctx->thr_cnts);
375401
/* Duplicate bt. */
376402
memcpy(ctx->vec, bt->vec, bt->len * sizeof(void *));
377403
ctx->bt.vec = ctx->vec;
@@ -397,8 +423,8 @@ prof_ctx_destroy(prof_ctx_t *ctx)
397423
assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
398424
prof_enter(prof_tdata);
399425
malloc_mutex_lock(ctx->lock);
400-
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
401-
ctx->nlimbo == 1) {
426+
if (thr_cnt_tree_first(&ctx->thr_cnts) == NULL &&
427+
ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 1) {
402428
assert(ctx->cnt_merged.curbytes == 0);
403429
assert(ctx->cnt_merged.accumobjs == 0);
404430
assert(ctx->cnt_merged.accumbytes == 0);
@@ -433,9 +459,9 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
433459
ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
434460
ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
435461
ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
436-
ql_remove(&ctx->cnts_ql, cnt, cnts_link);
437-
if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
438-
ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
462+
thr_cnt_tree_remove(&ctx->thr_cnts, cnt);
463+
if (opt_prof_accum == false && thr_cnt_tree_first(&ctx->thr_cnts) ==
464+
NULL && ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
439465
/*
440466
* Increment ctx->nlimbo in order to keep another thread from
441467
* winning the race to destroy ctx while this one has ctx->lock
@@ -540,7 +566,6 @@ prof_lookup(prof_bt_t *bt)
540566
prof_ctx_destroy(ctx);
541567
return (NULL);
542568
}
543-
ql_elm_new(ret.p, cnts_link);
544569
ret.p->ctx = ctx;
545570
ret.p->epoch = 0;
546571
memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
@@ -551,7 +576,7 @@ prof_lookup(prof_bt_t *bt)
551576
return (NULL);
552577
}
553578
malloc_mutex_lock(ctx->lock);
554-
ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link);
579+
thr_cnt_tree_insert(&ctx->thr_cnts, ret.p);
555580
ctx->nlimbo--;
556581
malloc_mutex_unlock(ctx->lock);
557582
}
@@ -745,12 +770,41 @@ prof_dump_printf(bool propagate_err, const char *format, ...)
745770
return (ret);
746771
}
747772

773+
static prof_thr_cnt_t *
774+
ctx_sum_iter(prof_thr_cnt_tree_t *thr_cnts, prof_thr_cnt_t *thr_cnt, void *arg)
775+
{
776+
prof_ctx_t *ctx = (prof_ctx_t *)arg;
777+
volatile unsigned *epoch = &thr_cnt->epoch;
778+
prof_cnt_t tcnt;
779+
780+
while (true) {
781+
unsigned epoch0 = *epoch;
782+
783+
/* Make sure epoch is even. */
784+
if (epoch0 & 1U)
785+
continue;
786+
787+
memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
788+
789+
/* Terminate if epoch didn't change while reading. */
790+
if (*epoch == epoch0)
791+
break;
792+
}
793+
794+
ctx->cnt_summed.curobjs += tcnt.curobjs;
795+
ctx->cnt_summed.curbytes += tcnt.curbytes;
796+
if (opt_prof_accum) {
797+
ctx->cnt_summed.accumobjs += tcnt.accumobjs;
798+
ctx->cnt_summed.accumbytes += tcnt.accumbytes;
799+
}
800+
801+
return (NULL);
802+
}
803+
748804
static void
749805
prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx,
750-
prof_ctx_list_t *ctx_ql)
806+
prof_ctx_tree_t *ctxs)
751807
{
752-
prof_thr_cnt_t *thr_cnt;
753-
prof_cnt_t tcnt;
754808

755809
cassert(config_prof);
756810

@@ -762,33 +816,10 @@ prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx,
762816
* prof_dump()'s second pass.
763817
*/
764818
ctx->nlimbo++;
765-
ql_tail_insert(ctx_ql, ctx, dump_link);
819+
ctx_tree_insert(ctxs, ctx);
766820

767821
memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
768-
ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
769-
volatile unsigned *epoch = &thr_cnt->epoch;
770-
771-
while (true) {
772-
unsigned epoch0 = *epoch;
773-
774-
/* Make sure epoch is even. */
775-
if (epoch0 & 1U)
776-
continue;
777-
778-
memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
779-
780-
/* Terminate if epoch didn't change while reading. */
781-
if (*epoch == epoch0)
782-
break;
783-
}
784-
785-
ctx->cnt_summed.curobjs += tcnt.curobjs;
786-
ctx->cnt_summed.curbytes += tcnt.curbytes;
787-
if (opt_prof_accum) {
788-
ctx->cnt_summed.accumobjs += tcnt.accumobjs;
789-
ctx->cnt_summed.accumbytes += tcnt.accumbytes;
790-
}
791-
}
822+
thr_cnt_tree_iter(&ctx->thr_cnts, NULL, ctx_sum_iter, (void *)ctx);
792823

793824
if (ctx->cnt_summed.curobjs != 0)
794825
(*leak_nctx)++;
@@ -829,25 +860,24 @@ prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
829860
}
830861

831862
static void
832-
prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
863+
prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_tree_t *ctxs)
833864
{
834865

835866
ctx->nlimbo--;
836-
ql_remove(ctx_ql, ctx, dump_link);
837867
}
838868

839869
static void
840-
prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
870+
prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_tree_t *ctxs)
841871
{
842872

843873
malloc_mutex_lock(ctx->lock);
844-
prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
874+
prof_dump_ctx_cleanup_locked(ctx, ctxs);
845875
malloc_mutex_unlock(ctx->lock);
846876
}
847877

848878
static bool
849879
prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt,
850-
prof_ctx_list_t *ctx_ql)
880+
prof_ctx_tree_t *ctxs)
851881
{
852882
bool ret;
853883
unsigned i;
@@ -895,7 +925,7 @@ prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt,
895925

896926
ret = false;
897927
label_return:
898-
prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
928+
prof_dump_ctx_cleanup_locked(ctx, ctxs);
899929
malloc_mutex_unlock(ctx->lock);
900930
return (ret);
901931
}
@@ -966,6 +996,26 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx,
966996
}
967997
}
968998

999+
static prof_ctx_t *
1000+
prof_ctx_dump_iter(prof_ctx_tree_t *ctxs, prof_ctx_t *ctx, void *arg)
1001+
{
1002+
bool propagate_err = *(bool *)arg;
1003+
1004+
if (prof_dump_ctx(propagate_err, ctx, &ctx->bt, ctxs))
1005+
return (ctx_tree_next(ctxs, ctx));
1006+
1007+
return (NULL);
1008+
}
1009+
1010+
static prof_ctx_t *
1011+
prof_ctx_cleanup_iter(prof_ctx_tree_t *ctxs, prof_ctx_t *ctx, void *arg)
1012+
{
1013+
1014+
prof_dump_ctx_cleanup(ctx, ctxs);
1015+
1016+
return (NULL);
1017+
}
1018+
9691019
static bool
9701020
prof_dump(bool propagate_err, const char *filename, bool leakcheck)
9711021
{
@@ -977,7 +1027,8 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
9771027
void *v;
9781028
} ctx;
9791029
size_t leak_nctx;
980-
prof_ctx_list_t ctx_ql;
1030+
prof_ctx_tree_t ctxs;
1031+
prof_ctx_t *cleanup_start = NULL;
9811032

9821033
cassert(config_prof);
9831034

@@ -990,10 +1041,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
9901041
/* Merge per thread profile stats, and sum them in cnt_all. */
9911042
memset(&cnt_all, 0, sizeof(prof_cnt_t));
9921043
leak_nctx = 0;
993-
ql_new(&ctx_ql);
1044+
ctx_tree_new(&ctxs);
9941045
prof_enter(prof_tdata);
9951046
for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
996-
prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql);
1047+
prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctxs);
9971048
prof_leave(prof_tdata);
9981049

9991050
/* Create dump file. */
@@ -1005,10 +1056,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
10051056
goto label_write_error;
10061057

10071058
/* Dump per ctx profile stats. */
1008-
while ((ctx.p = ql_first(&ctx_ql)) != NULL) {
1009-
if (prof_dump_ctx(propagate_err, ctx.p, &ctx.p->bt, &ctx_ql))
1010-
goto label_write_error;
1011-
}
1059+
cleanup_start = ctx_tree_iter(&ctxs, NULL, prof_ctx_dump_iter,
1060+
(void *)&propagate_err);
1061+
if (cleanup_start != NULL)
1062+
goto label_write_error;
10121063

10131064
/* Dump /proc/<pid>/maps if possible. */
10141065
if (prof_dump_maps(propagate_err))
@@ -1026,8 +1077,10 @@ prof_dump(bool propagate_err, const char *filename, bool leakcheck)
10261077
label_write_error:
10271078
prof_dump_close(propagate_err);
10281079
label_open_close_error:
1029-
while ((ctx.p = ql_first(&ctx_ql)) != NULL)
1030-
prof_dump_ctx_cleanup(ctx.p, &ctx_ql);
1080+
if (cleanup_start != NULL) {
1081+
ctx_tree_iter(&ctxs, cleanup_start, prof_ctx_cleanup_iter,
1082+
NULL);
1083+
}
10311084
malloc_mutex_unlock(&prof_dump_mtx);
10321085
return (true);
10331086
}

0 commit comments

Comments
 (0)