Skip to content

Commit 44d2eeb

Browse files
committed
flamegraphs
1 parent 280550e commit 44d2eeb

File tree

2 files changed

+54
-47
lines changed

2 files changed

+54
-47
lines changed

src/parcel_snoop_inference.jl

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1789,27 +1789,33 @@ stripifi(args...) = strip_prefix(args..., "InferenceFrameInfo for ")
17891789
## Flamegraph creation
17901790

17911791
"""
1792-
flamegraph(tinf::InferenceTimingNode; tmin=0.0, excluded_modules=Set([Main]), mode=nothing)
1792+
flamegraph(tinf::InferenceTimingNode; include_llvm=true, tmin=0.0, excluded_modules=Set([Main]), mode=nothing)
17931793
1794-
Convert the call tree of inference timings returned from `@snoop_inference` into a FlameGraph.
1795-
Returns a FlameGraphs.FlameGraph structure that represents the timing trace recorded for
1796-
type inference.
1794+
Convert the call tree of inference timings returned from `@snoop_inference` into
1795+
a FlameGraph. Returns a FlameGraphs.FlameGraph structure that represents the
1796+
timing trace recorded for type inference and, if `include_llvm` is `true`, LLVM
1797+
compilation.
17971798
17981799
Frames that take less than `tmin` seconds of inclusive time will not be included
1799-
in the resultant FlameGraph (meaning total time including it and all of its children).
1800-
This can be helpful if you have a very big profile, to save on processing time.
1800+
in the resultant FlameGraph (meaning total time including it and all of its
1801+
children). This can be helpful if you have a very big profile, to save on
1802+
processing time.
18011803
1802-
Non-precompilable frames are marked in reddish colors. `excluded_modules` can be used to mark methods
1803-
defined in modules to which you cannot or do not wish to add precompiles.
1804+
Non-precompilable frames are marked in reddish colors. `excluded_modules` can be
1805+
used to mark methods defined in modules to which you cannot or do not wish to
1806+
add precompiles.
18041807
1805-
`mode` controls how frames are named in tools like ProfileView.
1806-
`nothing` uses the default of just the qualified function name, whereas
1807-
supplying `mode=Dict(method => count)` counting the number of specializations of
1808-
each method will cause the number of specializations to be included in the frame name.
1808+
`mode` controls how frames are named in tools like ProfileView. `nothing` uses
1809+
the default of just the qualified function name, whereas `mode=:spec` will print
1810+
the full specialization of each MethodInstance. Supplying a pre-populated
1811+
`mode=Dict(method => count)` will cause `count` to be included in the frame name
1812+
of the corresponding `method`. This can be used, for example, to display the
1813+
number of specializations of each method.
18091814
18101815
# Example
18111816
1812-
We'll use [`SnoopCompile.flatten_demo`](@ref), which runs `@snoop_inference` on a workload designed to yield reproducible results:
1817+
We'll use [`SnoopCompile.flatten_demo`](@ref), which runs `@snoop_inference` on
1818+
a workload designed to yield reproducible results:
18131819
18141820
```jldoctest flamegraph; setup=:(using SnoopCompile), filter=r"([0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?/[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?|at.*typeinfer\\.jl:\\d+|0:\\d+|WARNING: replacing module FlattenDemo\\.\\n)"
18151821
julia> tinf = SnoopCompile.flatten_demo()
@@ -1823,36 +1829,44 @@ Node(FlameGraphs.NodeData(ROOT() at typeinfer.jl:75, 0x00, 0:3334431))
18231829
julia> ProfileView.view(fg); # Display the FlameGraph in a package that supports it
18241830
```
18251831
1826-
You should be able to reconcile the resulting flamegraph to `print_tree(tinf)` (see [`flatten`](@ref)).
1832+
You should be able to reconcile the resulting flamegraph to `print_tree(tinf)`
1833+
(see [`flatten`](@ref)).
18271834
1828-
The empty horizontal periods in the flamegraph correspond to times when something other than inference is running.
1829-
The total width of the flamegraph is set from the `ROOT` node.
1835+
The empty horizontal periods in the flamegraph correspond to times when
1836+
something other than inference is running. The total width of the flamegraph is
1837+
set from the `ROOT` node.
18301838
"""
1831-
function FlameGraphs.flamegraph(tinf::InferenceTimingNode; tmin = 0.0, excluded_modules=Set([Main::Module]), mode=nothing)
1839+
function FlameGraphs.flamegraph(tinf::InferenceTimingNode; include_llvm::Bool=true, tmin = 0.0, excluded_modules=Set([Main::Module]), mode=nothing)
18321840
isROOT(tinf) && isempty(tinf.children) && @warn "Empty profile: no compilation was recorded."
1841+
duration(node) = inclusive(node) + include_llvm * reinterpret(Float16, node.ci.time_compile)
1842+
ctimes = pushfirst!(cumsum(duration.(tinf.children)), 0)
18331843
io = IOBuffer()
18341844
# Compute a "root" frame for the top-level node, to cover the whole profile
1835-
node_data, _ = _flamegraph_frame(io, tinf, tinf.start_time, true, excluded_modules, mode; toplevel=true)
1845+
node_data, _ = _flamegraph_frame(duration, io, tinf, 0, false, excluded_modules, mode; stop_secs=ctimes[end])
18361846
root = Node(node_data)
18371847
if !isROOT(tinf)
1838-
node_data, child_check_precompilable = _flamegraph_frame(io, tinf, tinf.start_time, true, excluded_modules, mode; toplevel=false)
1848+
node_data, child_check_precompilable = _flamegraph_frame(duration, io, tinf, 0, true, excluded_modules, mode)
18391849
root = addchild(root, node_data)
18401850
end
1841-
return _build_flamegraph!(root, io, tinf, tinf.start_time, tmin, true, excluded_modules, mode)
1851+
return _build_flamegraph!(duration, root, io, tinf, ctimes, tmin, true, excluded_modules, mode)
18421852
end
1843-
function _build_flamegraph!(root, io::IO, node::InferenceTimingNode, start_secs, tmin, check_precompilable, excluded_modules, mode)
1844-
for child in node.children
1845-
if inclusive(child) > tmin
1846-
node_data, child_check_precompilable = _flamegraph_frame(io, child, start_secs, check_precompilable, excluded_modules, mode; toplevel=false)
1853+
function _build_flamegraph!(duration, root, io::IO, node::InferenceTimingNode, ctimes, tmin, check_precompilable, excluded_modules, mode)
1854+
start_secs = first(ctimes)
1855+
for (i, child) in pairs(node.children)
1856+
if duration(child) > tmin
1857+
node_data, child_check_precompilable = _flamegraph_frame(duration, io, child, ctimes[i], check_precompilable, excluded_modules, mode)
18471858
node = addchild(root, node_data)
1848-
_build_flamegraph!(node, io, child, start_secs, tmin, child_check_precompilable, excluded_modules, mode)
1859+
if !isempty(child.children)
1860+
newtimes = pushfirst!(cumsum(duration.(child.children)), 0) .+ start_secs
1861+
_build_flamegraph!(duration, node, io, child, newtimes, tmin, child_check_precompilable, excluded_modules, mode)
1862+
end
18491863
end
18501864
end
18511865
return root
18521866
end
18531867

18541868
# Create a profile frame for this node
1855-
function _flamegraph_frame(io::IO, node::InferenceTimingNode, start_secs, check_precompilable::Bool, excluded_modules, mode; toplevel)
1869+
function _flamegraph_frame(duration, io::IO, node::InferenceTimingNode, start_secs, check_precompilable::Bool, excluded_modules, mode; stop_secs=nothing)
18561870
function func_name(mi::MethodInstance, ::Nothing)
18571871
m = mi.def
18581872
return isa(m, Method) ? string(m.module, '.', m.name) : string(m, '.', "thunk")
@@ -1868,20 +1882,17 @@ function _flamegraph_frame(io::IO, node::InferenceTimingNode, start_secs, check_
18681882
end
18691883
return str
18701884
end
1871-
# function func_name(io::IO, mi_info::InferenceFrameInfo, mode)
1872-
# if mode === :slots
1873-
# show(io, mi_info)
1874-
# str = String(take!(io))
1875-
# startswith(str, "InferenceFrameInfo for ") && (str = str[length("InferenceFrameInfo for ")+1:end])
1876-
# return str
1877-
# elseif mode === :spec
1878-
# return frame_name(io, mi_info)
1879-
# else
1880-
# return func_name(MethodInstance(mi_info), mode)
1881-
# end
1882-
# end
1885+
function func_name(io::IO, node::InferenceTimingNode, mode)
1886+
if mode === :slots
1887+
error("unsupported in Julia 1.12")
1888+
elseif mode === :spec
1889+
return frame_name(io, MethodInstance(node))
1890+
else
1891+
return func_name(MethodInstance(node), mode)
1892+
end
1893+
end
18831894

1884-
mistr = Symbol(func_name(io, MethodInstance(node), mode))
1895+
mistr = Symbol(func_name(io, node, mode))
18851896
mi = MethodInstance(node)
18861897
m = mi.def
18871898
sf = isa(m, Method) ? StackFrame(mistr, mi.def.file, mi.def.line, mi, false, false, UInt64(0x0)) :
@@ -1899,14 +1910,10 @@ function _flamegraph_frame(io::IO, node::InferenceTimingNode, start_secs, check_
18991910
if hasconstprop(InferenceTiming(node))
19001911
status |= FlameGraphs.gc_event
19011912
end
1902-
start = node.start_time - start_secs
1903-
if toplevel
1904-
# Compute a range over the whole profile for the top node.
1905-
stop_secs = isROOT(node) ? max_end_time(node) : max_end_time(node, true)
1906-
range = round(Int, start*1e9) : round(Int, (stop_secs - start_secs)*1e9)
1907-
else
1908-
range = round(Int, start*1e9) : round(Int, (start + inclusive(node))*1e9)
1913+
if stop_secs === nothing
1914+
stop_secs = duration(node) + start_secs
19091915
end
1916+
range = round(Int, start_secs*1e9) : round(Int, (stop_secs - start_secs)*1e9)
19101917
return FlameGraphs.NodeData(sf, status, range), check_precompilable
19111918
end
19121919

test/snoop_inference.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ end
633633
fgnodes = collect(AbstractTrees.PreOrderDFS(fg))
634634
for tgtname in (:h, :i, :+)
635635
@test mapreduce(|, fgnodes; init=false) do node
636-
node.data.sf.linfo.def.name == tgtname
636+
SnoopCompile.methodinstance(node.data.sf.linfo).def.name == tgtname
637637
end
638638
end
639639
# Test that the span covers the whole tree, and check for const-prop

0 commit comments

Comments
 (0)