Skip to content

Commit 140ea94

Browse files
authored
[Profile] add builtin Allocs.print and formatting equivalents to Profile.print (#51981)
Defines a converter for Allocs to the types implemented by Profile for printing, allowing flamegraph report generation of allocations with just the stdlib tooling. Refs: #42768 For old versions of Julia, you could previously get download a copy of this code that could be included as a polyfill to add this: https://gist.github.com/vtjnash/c6aa4db9dafccb0fd28a65f87d6b1adb
1 parent e7345b8 commit 140ea94

File tree

5 files changed

+257
-27
lines changed

5 files changed

+257
-27
lines changed

stdlib/Profile/docs/src/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as `
106106

107107
```@docs
108108
Profile.Allocs.clear
109+
Profile.Allocs.print
109110
Profile.Allocs.fetch
110111
Profile.Allocs.start
111112
Profile.Allocs.stop

stdlib/Profile/src/Allocs.jl

Lines changed: 205 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
module Allocs
22

3+
global print # Allocs.print is separate from both Base.print and Profile.print
4+
public @profile,
5+
clear,
6+
print,
7+
fetch
8+
9+
using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree
310
using Base.StackTraces: StackTrace, StackFrame, lookup
411
using Base: InterpreterIP
512

@@ -138,7 +145,7 @@ end
138145
# Without this, the Alloc's stacktrace prints for lines and lines and lines...
139146
function Base.show(io::IO, a::Alloc)
140147
stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
141-
print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
148+
Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
142149
end
143150

144151
const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
@@ -216,4 +223,201 @@ function stacktrace_memoized(
216223
return stack
217224
end
218225

226+
function warning_empty()
227+
@warn """
228+
There were no samples collected.
229+
Run your program longer (perhaps by running it multiple times),
230+
or adjust the frequency of samples to record every event with
231+
the `sample_rate=1.0` kwarg."""
232+
end
233+
234+
235+
"""
236+
Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...)
237+
238+
Prints profiling results to `io` (by default, `stdout`). If you do not
239+
supply a `data` vector, the internal buffer of accumulated backtraces
240+
will be used.
241+
242+
See `Profile.print` for an explanation of the valid keyword arguments.
243+
"""
244+
print(; kwargs...) =
245+
Profile.print(stdout, fetch(); kwargs...)
246+
print(io::IO; kwargs...) =
247+
Profile.print(io, fetch(); kwargs...)
248+
print(io::IO, data::AllocResults; kwargs...) =
249+
Profile.print(io, data; kwargs...)
250+
Profile.print(data::AllocResults; kwargs...) =
251+
Profile.print(stdout, data; kwargs...)
252+
253+
function Profile.print(io::IO,
254+
data::AllocResults,
255+
;
256+
format = :tree,
257+
C = false,
258+
#combine = true,
259+
maxdepth::Int = typemax(Int),
260+
mincount::Int = 0,
261+
noisefloor = 0,
262+
sortedby::Symbol = :filefuncline,
263+
groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
264+
recur::Symbol = :off,
265+
)
266+
pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur)
267+
Profile.print(io, data, pf, format)
268+
return
269+
end
270+
271+
function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol)
272+
cols::Int = Base.displaysize(io)[2]
273+
fmt.recur (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
274+
data = data.allocs
275+
if format === :tree
276+
tree(io, data, cols, fmt)
277+
elseif format === :flat
278+
fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
279+
flat(io, data, cols, fmt)
280+
else
281+
throw(ArgumentError("output format $(repr(format)) not recognized"))
282+
end
283+
nothing
284+
end
285+
286+
287+
function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T
288+
lilist = StackFrame[]
289+
n = Int[]
290+
m = Int[]
291+
lilist_idx = Dict{T, Int}()
292+
recursive = Set{T}()
293+
totalbytes = 0
294+
for r in data
295+
first = true
296+
empty!(recursive)
297+
nb = r.size # or 1 for counting
298+
totalbytes += nb
299+
for frame in r.stacktrace
300+
!C && frame.from_c && continue
301+
key = (T === UInt64 ? ip : frame)
302+
idx = get!(lilist_idx, key, length(lilist) + 1)
303+
if idx > length(lilist)
304+
push!(recursive, key)
305+
push!(lilist, frame)
306+
push!(n, nb)
307+
push!(m, 0)
308+
elseif !(key in recursive)
309+
push!(recursive, key)
310+
n[idx] += nb
311+
end
312+
if first
313+
m[idx] += nb
314+
first = false
315+
end
316+
end
317+
end
318+
@assert length(lilist) == length(n) == length(m) == length(lilist_idx)
319+
return (lilist, n, m, totalbytes)
320+
end
321+
322+
function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
323+
fmt.combine || error(ArgumentError("combine=false"))
324+
lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
325+
filenamemap = Dict{Symbol,String}()
326+
if isempty(lilist)
327+
warning_empty()
328+
return true
329+
end
330+
print_flat(io, lilist, n, m, cols, filenamemap, fmt)
331+
Base.println(io, "Total snapshots: ", length(data))
332+
Base.println(io, "Total bytes: ", totalbytes)
333+
return false
334+
end
335+
336+
function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T}
337+
tops = Vector{StackFrameTree{T}}()
338+
build = Dict{T, StackFrameTree{T}}()
339+
for r in all
340+
first = true
341+
nb = r.size # or 1 for counting
342+
root.recur = 0
343+
root.count += nb
344+
parent = root
345+
for i in reverse(eachindex(r.stacktrace))
346+
frame = r.stacktrace[i]
347+
key = (T === UInt64 ? ip : frame)
348+
if (recur === :flat && !frame.from_c) || recur === :flatc
349+
# see if this frame already has a parent
350+
this = get!(build, frame, parent)
351+
if this !== parent
352+
# Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree
353+
push!(tops, parent)
354+
parent = this
355+
end
356+
end
357+
!C && frame.from_c && continue
358+
this = get!(StackFrameTree{T}, parent.down, key)
359+
if recur === :off || this.recur == 0
360+
this.frame = frame
361+
this.up = parent
362+
this.count += nb
363+
this.recur = 1
364+
else
365+
this.count_recur += 1
366+
end
367+
parent = this
368+
end
369+
parent.overhead += nb
370+
if recur !== :off
371+
# We mark all visited nodes to so we'll only count those branches
372+
# once for each backtrace. Reset that now for the next backtrace.
373+
empty!(build)
374+
push!(tops, parent)
375+
for top in tops
376+
while top.recur != 0
377+
top.max_recur < top.recur && (top.max_recur = top.recur)
378+
top.recur = 0
379+
top = top.up
380+
end
381+
end
382+
empty!(tops)
383+
end
384+
let this = parent
385+
while this !== root
386+
this.flat_count += nb
387+
this = this.up
388+
end
389+
end
390+
end
391+
function cleanup!(node::StackFrameTree)
392+
stack = [node]
393+
while !isempty(stack)
394+
node = pop!(stack)
395+
node.recur = 0
396+
empty!(node.builder_key)
397+
empty!(node.builder_value)
398+
append!(stack, values(node.down))
399+
end
400+
nothing
401+
end
402+
cleanup!(root)
403+
return root
404+
end
405+
406+
function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
407+
fmt.combine || error(ArgumentError("combine=false"))
408+
if fmt.combine
409+
root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur)
410+
else
411+
root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur)
412+
end
413+
print_tree(io, root, cols, fmt, false)
414+
if isempty(root.down)
415+
warning_empty()
416+
return true
417+
end
418+
Base.println(io, "Total snapshots: ", length(data))
419+
Base.println(io, "Total bytes: ", root.count)
420+
return false
421+
end
422+
219423
end

stdlib/Profile/src/Profile.jl

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,20 @@ Profiling support, main entry point is the [`@profile`](@ref) macro.
55
"""
66
module Profile
77

8+
global print
9+
public @profile,
10+
clear,
11+
print,
12+
fetch,
13+
retrieve,
14+
add_fake_meta,
15+
flatten,
16+
callers,
17+
init,
18+
take_heap_snapshot,
19+
clear_malloc_data,
20+
Allocs
21+
822
import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
923

1024
const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
@@ -220,7 +234,7 @@ function print(io::IO,
220234

221235
pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
222236
if groupby === :none
223-
print(io, data, lidict, pf, format, threads, tasks, false)
237+
print_group(io, data, lidict, pf, format, threads, tasks, false)
224238
else
225239
if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
226240
error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
@@ -244,7 +258,7 @@ function print(io::IO,
244258
printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
245259
for threadid in threadids
246260
printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
247-
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
261+
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
248262
nosamples && (any_nosamples = true)
249263
println(io)
250264
end
@@ -262,7 +276,7 @@ function print(io::IO,
262276
printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
263277
for taskid in taskids
264278
printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
265-
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
279+
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
266280
nosamples && (any_nosamples = true)
267281
println(io)
268282
end
@@ -274,7 +288,7 @@ function print(io::IO,
274288
isempty(taskids) && (any_nosamples = true)
275289
for taskid in taskids
276290
printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
277-
nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
291+
nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
278292
nosamples && (any_nosamples = true)
279293
println(io)
280294
end
@@ -284,7 +298,7 @@ function print(io::IO,
284298
isempty(threadids) && (any_nosamples = true)
285299
for threadid in threadids
286300
printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
287-
nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
301+
nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
288302
nosamples && (any_nosamples = true)
289303
println(io)
290304
end
@@ -306,7 +320,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
306320
print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
307321
print(stdout, data, lidict; kwargs...)
308322

309-
function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
323+
function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
310324
format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
311325
is_subsection::Bool = false)
312326
cols::Int = Base.displaysize(io)[2]

stdlib/Profile/test/allocs.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
using Test
22
using Profile: Allocs
33

4+
Allocs.clear()
5+
let iobuf = IOBuffer()
6+
for format in (:tree, :flat)
7+
Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true)
8+
end
9+
end
10+
411
@testset "alloc profiler doesn't segfault" begin
512
res = Allocs.@profile sample_rate=1.0 begin
613
# test the allocations during compilation
@@ -13,6 +20,20 @@ using Profile: Allocs
1320
@test first_alloc.size > 0
1421
@test length(first_alloc.stacktrace) > 0
1522
@test length(string(first_alloc.type)) > 0
23+
24+
# test printing options
25+
for options in ((format=:tree, C=true),
26+
(format=:tree, maxdepth=2),
27+
(format=:flat, C=true),
28+
(),
29+
(format=:flat, sortedby=:count),
30+
(format=:tree, recur=:flat),
31+
)
32+
iobuf = IOBuffer()
33+
Allocs.print(iobuf; options...)
34+
str = String(take!(iobuf))
35+
@test !isempty(str)
36+
end
1637
end
1738

1839
@testset "alloc profiler works when there are multiple tasks on multiple threads" begin

stdlib/Profile/test/runtests.jl

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,28 +38,18 @@ let r = Profile.retrieve()
3838
end
3939
end
4040

41-
let iobuf = IOBuffer()
42-
Profile.print(iobuf, format=:tree, C=true)
43-
str = String(take!(iobuf))
44-
@test !isempty(str)
45-
truncate(iobuf, 0)
46-
Profile.print(iobuf, format=:tree, maxdepth=2)
47-
str = String(take!(iobuf))
48-
@test !isempty(str)
49-
truncate(iobuf, 0)
50-
Profile.print(iobuf, format=:flat, C=true)
51-
str = String(take!(iobuf))
52-
@test !isempty(str)
53-
truncate(iobuf, 0)
54-
Profile.print(iobuf)
55-
@test !isempty(String(take!(iobuf)))
56-
truncate(iobuf, 0)
57-
Profile.print(iobuf, format=:flat, sortedby=:count)
58-
@test !isempty(String(take!(iobuf)))
59-
Profile.print(iobuf, format=:tree, recur=:flat)
41+
# test printing options
42+
for options in ((format=:tree, C=true),
43+
(format=:tree, maxdepth=2),
44+
(format=:flat, C=true),
45+
(),
46+
(format=:flat, sortedby=:count),
47+
(format=:tree, recur=:flat),
48+
)
49+
iobuf = IOBuffer()
50+
Profile.print(iobuf; options...)
6051
str = String(take!(iobuf))
6152
@test !isempty(str)
62-
truncate(iobuf, 0)
6353
end
6454

6555
@testset "Profile.print() groupby options" begin

0 commit comments

Comments
 (0)