Skip to content

Commit 6f143ea

Browse files
authored
invert linetable representation (#52415)
Previously, our linetables (similar to LLVM) represented line information as a linked list from callee via inlined_at up to the original information. This requires many copies of this information to be created. Instead we can take advantage of the necessary existence of the line table from the child to flip this chain of information and instead make each statement be a table describing (for each IR instruction): `(current line number, (index into edges, index into edges statements))` plus a table of all edges, plus a table with the original line numbers from the parser, plus the file name. This is all packed into the struct struct DebugInfo def::Union{Method,MethodInstance,Symbol} linetable::Union{Nothing,DebugInfo} edges::SimpleVector{DebugInfo} codelocs::String end Which is described in doc/src/devdocs/ast.md for what each field means and look at stacktraces.jl or compiler/ssair/show.jl to look at how to decode and interpret this information. For the sysimage, this saves several megabytes (about 113 MB -> 110 MB) and about 5% of the stdlib pkgimages (294 MB -> 279 MB). It also now happens to have the full type information for the inlined functions. Now if you create an `IRShow.DILineInfoPrinter` with `showtypes=true`, it can print that information when printing IR. ``` julia> @eval Base.IRShow DILineInfoPrinter(debuginfo, def) = DILineInfoPrinter(debuginfo, def, true) DILineInfoPrinter (generic function with 2 methods) julia> (@code_typed 1 + 1.0)[1] CodeInfo( @ promotion.jl:425 within `+` ┌ invoke MethodInstance for promote(::Int64, ::Float64) │ @ promotion.jl:396 within `promote` │┌ invoke MethodInstance for Base._promote(::Int64, ::Float64) ││ @ promotion.jl:373 within `_promote` ││┌ invoke MethodInstance for convert(::Type{Float64}, ::Int64) │││ @ number.jl:7 within `convert` │││┌ invoke MethodInstance for Float64(::Int64) ││││ @ float.jl:221 within `Float64` 1 ─││││ %1 = Base.sitofp(Float64, x)::Float64 │ └└└└ │ ┌ invoke MethodInstance for +(::Float64, ::Float64) │ │ @ float.jl:460 within `+` │ │ %2 = Base.add_float(%1, y)::Float64 │ └ └── return %2 ) ```
1 parent 8413b97 commit 6f143ea

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1488
-899
lines changed

base/boot.jl

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
# }
6565
#end
6666

67-
# struct GenericMemoryRef{kind::Symbol, T, AS::AddrSpace}
67+
#struct GenericMemoryRef{kind::Symbol, T, AS::AddrSpace}
6868
# mem::GenericMemory{kind, T, AS}
6969
# data::Ptr{Cvoid} # make this GenericPtr{addrspace, Cvoid}
7070
#end
@@ -125,12 +125,13 @@
125125
# file::Union{Symbol,Nothing}
126126
#end
127127

128-
#struct LineInfoNode
129-
# module::Module
130-
# method::Any (Union{Symbol, Method, MethodInstance})
131-
# file::Symbol
132-
# line::Int32
133-
# inlined_at::Int32
128+
#struct LegacyLineInfoNode end # only used internally during lowering
129+
130+
#struct DebugInfo
131+
# def::Any # (Union{Symbol, Method, MethodInstance})
132+
# linetable::Any # (Union{Nothing,DebugInfo})
133+
# edges::SimpleVector # Vector{DebugInfo}
134+
# codelocs::String # compressed Vector{UInt8}
134135
#end
135136

136137
#struct GotoNode
@@ -296,6 +297,9 @@ TypeVar(@nospecialize(n), @nospecialize(ub)) = _typevar(n::Symbol, Union{}, ub)
296297
TypeVar(@nospecialize(n), @nospecialize(lb), @nospecialize(ub)) = _typevar(n::Symbol, lb, ub)
297298
UnionAll(@nospecialize(v), @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v::TypeVar, t)
298299

300+
const Memory{T} = GenericMemory{:not_atomic, T, CPU}
301+
const MemoryRef{T} = GenericMemoryRef{:not_atomic, T, CPU}
302+
299303
# simple convert for use by constructors of types in Core
300304
# note that there is no actual conversion defined here,
301305
# so the methods and ccall's in Core aren't permitted to use convert
@@ -466,8 +470,10 @@ eval(Core, quote
466470
isa(f, String) && (f = Symbol(f))
467471
return $(Expr(:new, :LineNumberNode, :l, :f))
468472
end
469-
LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) =
470-
$(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))
473+
DebugInfo(def::Union{Method,MethodInstance,Symbol}, linetable::Union{Nothing,DebugInfo}, edges::SimpleVector, codelocs::String) =
474+
$(Expr(:new, :DebugInfo, :def, :linetable, :edges, :codelocs))
475+
DebugInfo(def::Union{Method,MethodInstance,Symbol}) =
476+
$(Expr(:new, :DebugInfo, :def, nothing, Core.svec(), ""))
471477
SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))
472478
PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))
473479
PiNode(@nospecialize(val), @nospecialize(typ)) = $(Expr(:new, :PiNode, :val, :typ))
@@ -482,16 +488,25 @@ eval(Core, quote
482488
MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) = $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))
483489
end)
484490

491+
struct LineInfoNode # legacy support for aiding Serializer.deserialize of old IR
492+
mod::Module
493+
method
494+
file::Symbol
495+
line::Int32
496+
inlined_at::Int32
497+
LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) = new(mod, method, file, line, inlined_at)
498+
end
499+
500+
485501
function CodeInstance(
486502
mi::MethodInstance, owner, @nospecialize(rettype), @nospecialize(exctype), @nospecialize(inferred_const),
487503
@nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
488504
ipo_effects::UInt32, effects::UInt32, @nospecialize(analysis_results),
489-
relocatability::UInt8)
505+
relocatability::UInt8, edges::DebugInfo)
490506
return ccall(:jl_new_codeinst, Ref{CodeInstance},
491-
(Any, Any, Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8),
507+
(Any, Any, Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8, Any),
492508
mi, owner, rettype, exctype, inferred_const, inferred, const_flags, min_world, max_world,
493-
ipo_effects, effects, analysis_results,
494-
relocatability)
509+
ipo_effects, effects, analysis_results, relocatability, edges)
495510
end
496511
GlobalRef(m::Module, s::Symbol) = ccall(:jl_module_globalref, Ref{GlobalRef}, (Any, Any), m, s)
497512
Module(name::Symbol=:anonymous, std_imports::Bool=true, default_names::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, default_names)
@@ -629,12 +644,12 @@ module IR
629644

630645
export CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
631646
NewvarNode, SSAValue, SlotNumber, Argument,
632-
PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
647+
PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
633648
Const, PartialStruct, InterConditional, EnterNode
634649

635650
using Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, ReturnNode,
636651
NewvarNode, SSAValue, SlotNumber, Argument,
637-
PiNode, PhiNode, PhiCNode, UpsilonNode, LineInfoNode,
652+
PiNode, PhiNode, PhiCNode, UpsilonNode, DebugInfo,
638653
Const, PartialStruct, InterConditional, EnterNode
639654

640655
end # module IR

base/compiler/inferencestate.jl

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ mutable struct InferenceState
323323
exc_bestguess = Bottom
324324
ipo_effects = EFFECTS_TOTAL
325325

326-
insert_coverage = should_insert_coverage(mod, src)
326+
insert_coverage = should_insert_coverage(mod, src.debuginfo)
327327
if insert_coverage
328328
ipo_effects = Effects(ipo_effects; effect_free = ALWAYS_FALSE)
329329
end
@@ -474,25 +474,21 @@ function compute_trycatch(code::Vector{Any}, ip::BitSet)
474474
end
475475

476476
# check if coverage mode is enabled
477-
function should_insert_coverage(mod::Module, src::CodeInfo)
477+
function should_insert_coverage(mod::Module, debuginfo::DebugInfo)
478478
coverage_enabled(mod) && return true
479479
JLOptions().code_coverage == 3 || return false
480480
# path-specific coverage mode: if any line falls in a tracked file enable coverage for all
481-
linetable = src.linetable
482-
if isa(linetable, Vector{Any})
483-
for line in linetable
484-
line = line::LineInfoNode
485-
if is_file_tracked(line.file)
486-
return true
487-
end
488-
end
489-
elseif isa(linetable, Vector{LineInfoNode})
490-
for line in linetable
491-
if is_file_tracked(line.file)
492-
return true
493-
end
494-
end
495-
end
481+
return _should_insert_coverage(debuginfo)
482+
end
483+
484+
_should_insert_coverage(mod::Symbol) = is_file_tracked(mod)
485+
_should_insert_coverage(mod::Method) = _should_insert_coverage(mod.file)
486+
_should_insert_coverage(mod::MethodInstance) = _should_insert_coverage(mod.def)
487+
_should_insert_coverage(mod::Module) = false
488+
function _should_insert_coverage(info::DebugInfo)
489+
linetable = info.linetable
490+
linetable === nothing || (_should_insert_coverage(linetable) && return true)
491+
_should_insert_coverage(info.def) && return true
496492
return false
497493
end
498494

base/compiler/optimize.jl

Lines changed: 73 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -990,22 +990,79 @@ function run_passes_ipo_safe(
990990
if is_asserts()
991991
@timeit "verify 3" begin
992992
verify_ir(ir, true, false, optimizer_lattice(sv.inlining.interp))
993-
verify_linetable(ir.linetable)
993+
verify_linetable(ir.debuginfo, length(ir.stmts))
994994
end
995995
end
996996
@label __done__ # used by @pass
997997
return ir
998998
end
999999

1000-
function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
1001-
linetable = ci.linetable
1002-
if !isa(linetable, Vector{LineInfoNode})
1003-
linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
1000+
function strip_trailing_junk!(code::Vector{Any}, ssavaluetypes::Vector{Any}, ssaflags::Vector, debuginfo::DebugInfoStream, cfg::CFG, info::Vector{CallInfo})
1001+
# Remove `nothing`s at the end, we don't handle them well
1002+
# (we expect the last instruction to be a terminator)
1003+
codelocs = debuginfo.codelocs
1004+
for i = length(code):-1:1
1005+
if code[i] !== nothing
1006+
resize!(code, i)
1007+
resize!(ssavaluetypes, i)
1008+
resize!(codelocs, 3i)
1009+
resize!(info, i)
1010+
resize!(ssaflags, i)
1011+
break
1012+
end
1013+
end
1014+
# If the last instruction is not a terminator, add one. This can
1015+
# happen for implicit return on dead branches.
1016+
term = code[end]
1017+
if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
1018+
push!(code, ReturnNode())
1019+
push!(ssavaluetypes, Union{})
1020+
push!(codelocs, 0, 0, 0)
1021+
push!(info, NoCallInfo())
1022+
push!(ssaflags, IR_FLAG_NOTHROW)
1023+
1024+
# Update CFG to include appended terminator
1025+
old_range = cfg.blocks[end].stmts
1026+
new_range = StmtRange(first(old_range), last(old_range) + 1)
1027+
cfg.blocks[end] = BasicBlock(cfg.blocks[end], new_range)
1028+
(length(cfg.index) == length(cfg.blocks)) && (cfg.index[end] += 1)
1029+
end
1030+
nothing
1031+
end
1032+
1033+
function changed_lineinfo(di::DebugInfo, codeloc::Int, prevloc::Int)
1034+
while true
1035+
next = getdebugidx(di, codeloc)
1036+
next[1] < 0 && return false # invalid info
1037+
next[1] == 0 && next[2] == 0 && return false # no new info
1038+
prevloc <= 0 && return true # no old info
1039+
prev = getdebugidx(di, prevloc)
1040+
next === prev && return false # exactly identical
1041+
prev[1] < 0 && return true # previous invalid info, now valid
1042+
edge = next[2]
1043+
edge === prev[2] || return true # change to this edge
1044+
linetable = di.linetable
1045+
# check for change to line number here
1046+
if linetable === nothing || next[1] == 0
1047+
next[1] == prev[1] || return true
1048+
else
1049+
changed_lineinfo(linetable, next[1], prev[1]) && return true
1050+
end
1051+
# check for change to edge here
1052+
edge == 0 && return false # no edge here
1053+
di = di.edges[Int(edge)]::DebugInfo
1054+
codeloc = Int(next[3])
1055+
prevloc = Int(prev[3])
10041056
end
1057+
end
10051058

1059+
function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
10061060
# Update control-flow to reflect any unreachable branches.
10071061
ssavaluetypes = ci.ssavaluetypes::Vector{Any}
1008-
code = copy_exprargs(ci.code)
1062+
ci.code = code = copy_exprargs(ci.code)
1063+
di = DebugInfoStream(sv.linfo, ci.debuginfo, length(code))
1064+
codelocs = di.codelocs
1065+
ssaflags = ci.ssaflags
10091066
for i = 1:length(code)
10101067
expr = code[i]
10111068
if !(i in sv.unreachable)
@@ -1021,11 +1078,11 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
10211078
((block + 1) != destblock) && cfg_delete_edge!(sv.cfg, block, destblock)
10221079
expr = Expr(:call, Core.typeassert, expr.cond, Bool)
10231080
elseif i + 1 in sv.unreachable
1024-
@assert has_flag(ci.ssaflags[i], IR_FLAG_NOTHROW)
1081+
@assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
10251082
cfg_delete_edge!(sv.cfg, block, block + 1)
10261083
expr = GotoNode(expr.dest)
10271084
elseif expr.dest in sv.unreachable
1028-
@assert has_flag(ci.ssaflags[i], IR_FLAG_NOTHROW)
1085+
@assert has_flag(ssaflags[i], IR_FLAG_NOTHROW)
10291086
cfg_delete_edge!(sv.cfg, block, block_for_inst(sv.cfg, expr.dest))
10301087
expr = nothing
10311088
end
@@ -1052,20 +1109,17 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
10521109
# Go through and add an unreachable node after every
10531110
# Union{} call. Then reindex labels.
10541111
stmtinfo = sv.stmt_info
1055-
codelocs = ci.codelocs
1056-
ssaflags = ci.ssaflags
10571112
meta = Expr[]
10581113
idx = 1
10591114
oldidx = 1
10601115
nstmts = length(code)
10611116
ssachangemap = labelchangemap = blockchangemap = nothing
1062-
prevloc = zero(eltype(ci.codelocs))
1117+
prevloc = 0
10631118
while idx <= length(code)
1064-
codeloc = codelocs[idx]
1065-
if sv.insert_coverage && codeloc != prevloc && codeloc != 0
1119+
if sv.insert_coverage && changed_lineinfo(ci.debuginfo, oldidx, prevloc)
10661120
# insert a side-effect instruction before the current instruction in the same basic block
10671121
insert!(code, idx, Expr(:code_coverage_effect))
1068-
insert!(codelocs, idx, codeloc)
1122+
splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
10691123
insert!(ssavaluetypes, idx, Nothing)
10701124
insert!(stmtinfo, idx, NoCallInfo())
10711125
insert!(ssaflags, idx, IR_FLAG_NULL)
@@ -1084,7 +1138,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
10841138
end
10851139
blockchangemap[block_for_inst(sv.cfg, oldidx)] += 1
10861140
idx += 1
1087-
prevloc = codeloc
1141+
prevloc = oldidx
10881142
end
10891143
if ssavaluetypes[idx] === Union{} && !(oldidx in sv.unreachable) && !isa(code[idx], PhiNode)
10901144
# We should have converted any must-throw terminators to an equivalent w/o control-flow edges
@@ -1106,7 +1160,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
11061160
# terminator with an explicit `unreachable` marker.
11071161
if block_end > idx
11081162
code[block_end] = ReturnNode()
1109-
codelocs[block_end] = codelocs[idx]
1163+
codelocs[3block_end-2], codelocs[3block_end-1], codelocs[3block_end-0] = (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0])
11101164
ssavaluetypes[block_end] = Union{}
11111165
stmtinfo[block_end] = NoCallInfo()
11121166
ssaflags[block_end] = IR_FLAG_NOTHROW
@@ -1121,7 +1175,7 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
11211175
idx += block_end - idx
11221176
else
11231177
insert!(code, idx + 1, ReturnNode())
1124-
insert!(codelocs, idx + 1, codelocs[idx])
1178+
splice!(codelocs, 3idx-2:3idx-3, (codelocs[3idx-2], codelocs[3idx-1], codelocs[3idx-0]))
11251179
insert!(ssavaluetypes, idx + 1, Union{})
11261180
insert!(stmtinfo, idx + 1, NoCallInfo())
11271181
insert!(ssaflags, idx + 1, IR_FLAG_NOTHROW)
@@ -1158,14 +1212,14 @@ function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
11581212
for i = 1:length(code)
11591213
code[i] = process_meta!(meta, code[i])
11601214
end
1161-
strip_trailing_junk!(ci, sv.cfg, code, stmtinfo)
1215+
strip_trailing_junk!(code, ssavaluetypes, ssaflags, di, sv.cfg, stmtinfo)
11621216
types = Any[]
11631217
stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
11641218
# NOTE this `argtypes` contains types of slots yet: it will be modified to contain the
11651219
# types of call arguments only once `slot2reg` converts this `IRCode` to the SSA form
11661220
# and eliminates slots (see below)
11671221
argtypes = sv.slottypes
1168-
return IRCode(stmts, sv.cfg, linetable, argtypes, meta, sv.sptypes)
1222+
return IRCode(stmts, sv.cfg, di, argtypes, meta, sv.sptypes)
11691223
end
11701224

11711225
function process_meta!(meta::Vector{Expr}, @nospecialize stmt)

0 commit comments

Comments
 (0)