Skip to content

Commit 85de0bf

Browse files
topolaritygbaraldi
andcommitted
TOML: Improve type-stability
This changes the output of the TOML parser to provide specialize `Vector{T}` less aggressively, so that combinatorially expensive types like `Vector{Vector{Float64}}` or `Vector{Union{Float64,Int64}}` are instead returned as `Vector{Any}` Vectors of homogeneous leaf types, like `Vector{Float64}` are still supported as before. This change makes the TOML parser fully type-stable, except for its dynamic usage of Dates. Co-authored-by: Gabriel Baraldi <[email protected]>
1 parent 4dfce5d commit 85de0bf

File tree

3 files changed

+52
-47
lines changed

3 files changed

+52
-47
lines changed

base/loading.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,6 @@ struct TOMLCache{Dates}
269269
d::Dict{String, CachedTOMLDict}
270270
end
271271
TOMLCache(p::TOML.Parser) = TOMLCache(p, Dict{String, CachedTOMLDict}())
272-
# TODO: Delete this converting constructor once Pkg stops using it
273272
TOMLCache(p::TOML.Parser, d::Dict{String, Dict{String, Any}}) = TOMLCache(p, convert(Dict{String, CachedTOMLDict}, d))
274273

275274
const TOML_CACHE = TOMLCache(TOML.Parser{nothing}())

base/toml_parser.jl

Lines changed: 51 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,6 @@ mutable struct Parser{Dates}
8484

8585
# Filled in in case we are parsing a file to improve error messages
8686
filepath::Union{String, Nothing}
87-
88-
# Optionally populate with the Dates stdlib to change the type of Date types returned
89-
Dates::Union{Module, Nothing} # TODO: remove once Pkg is updated
9087
end
9188

9289
function Parser{Dates}(str::String; filepath=nothing) where {Dates}
@@ -106,8 +103,7 @@ function Parser{Dates}(str::String; filepath=nothing) where {Dates}
106103
IdSet{Any}(), # static_arrays
107104
IdSet{TOMLDict}(), # defined_tables
108105
root,
109-
filepath,
110-
nothing
106+
filepath
111107
)
112108
startup(l)
113109
return l
@@ -495,8 +491,10 @@ function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String},
495491
d = d::TOMLDict
496492
key = dotted_keys[i]
497493
d = get!(TOMLDict, d, key)
498-
if d isa Vector
494+
if d isa Vector{Any}
499495
d = d[end]
496+
elseif d isa Vector
497+
return ParserError(ErrKeyAlreadyHasValue)
500498
end
501499
check && @try check_allowed_add_key(l, d, i == length(dotted_keys))
502500
end
@@ -668,41 +666,20 @@ end
668666
# Array #
669667
#########
670668

671-
function push!!(v::Vector, el)
672-
# Since these types are typically non-inferable, they are a big invalidation risk,
673-
# and since it's used by the package-loading infrastructure the cost of invalidation
674-
# is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather
675-
# than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there
676-
# is no ambiguity about what types of objects will be created.
677-
T = eltype(v)
678-
t = typeof(el)
679-
if el isa T || t === T
680-
push!(v, el::T)
681-
return v
682-
elseif T === Union{}
683-
out = Vector{t}(undef, 1)
684-
out[1] = el
685-
return out
686-
else
687-
if T isa Union
688-
newT = Any
689-
else
690-
newT = Union{T, typeof(el)}
691-
end
692-
new = Array{newT}(undef, length(v))
693-
copy!(new, v)
694-
return push!(new, el)
669+
function copyto_typed!(a::Vector{T}, b::Vector) where T
670+
for i in 1:length(b)
671+
a[i] = b[i]::T
695672
end
673+
return nothing
696674
end
697675

698-
function parse_array(l::Parser)::Err{Vector}
676+
function parse_array(l::Parser{Dates})::Err{Vector} where Dates
699677
skip_ws_nl(l)
700-
array = Vector{Union{}}()
678+
array = Vector{Any}()
701679
empty_array = accept(l, ']')
702680
while !empty_array
703681
v = @try parse_value(l)
704-
# TODO: Worth to function barrier this?
705-
array = push!!(array, v)
682+
array = push!(array, v)
706683
# There can be an arbitrary number of newlines and comments before a value and before the closing bracket.
707684
skip_ws_nl(l)
708685
comma = accept(l, ',')
@@ -712,8 +689,40 @@ function parse_array(l::Parser)::Err{Vector}
712689
return ParserError(ErrExpectedCommaBetweenItemsArray)
713690
end
714691
end
715-
push!(l.static_arrays, array)
716-
return array
692+
# check for static type throughout array
693+
T = !isempty(array) ? typeof(array[1]) : Union{}
694+
for el in array
695+
if typeof(el) != T
696+
T = Any
697+
break
698+
end
699+
end
700+
if T === Any
701+
new = array
702+
elseif T === String
703+
new = Array{T}(undef, length(array))
704+
copyto_typed!(new, array)
705+
elseif T === Bool
706+
new = Array{T}(undef, length(array))
707+
copyto_typed!(new, array)
708+
elseif T === Int64
709+
new = Array{T}(undef, length(array))
710+
copyto_typed!(new, array)
711+
elseif T === UInt64
712+
new = Array{T}(undef, length(array))
713+
copyto_typed!(new, array)
714+
elseif T === Float64
715+
new = Array{T}(undef, length(array))
716+
copyto_typed!(new, array)
717+
elseif T === Union{}
718+
new = Union{}[]
719+
elseif (T === TOMLDict) || (T == BigInt) || (T === UInt128) || (T === Int128) || (T <: Vector) ||
720+
(T === Dates.Date) || (T === Dates.Time) || (T === Dates.DateTime)
721+
# do nothing, leave as Vector{Any}
722+
new = array
723+
else @assert false end
724+
push!(l.static_arrays, new)
725+
return new
717726
end
718727

719728

@@ -1025,10 +1034,9 @@ function parse_datetime(l)
10251034
end
10261035

10271036
function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) where Dates
1028-
if Dates !== nothing || p.Dates !== nothing
1029-
mod = Dates !== nothing ? Dates : p.Dates
1037+
if Dates !== nothing
10301038
try
1031-
return mod.DateTime(year, month, day, h, m, s, ms)
1039+
return Dates.DateTime(year, month, day, h, m, s, ms)
10321040
catch ex
10331041
ex isa ArgumentError && return ParserError(ErrParsingDateTime)
10341042
rethrow()
@@ -1039,10 +1047,9 @@ function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) wh
10391047
end
10401048

10411049
function try_return_date(p::Parser{Dates}, year, month, day) where Dates
1042-
if Dates !== nothing || p.Dates !== nothing
1043-
mod = Dates !== nothing ? Dates : p.Dates
1050+
if Dates !== nothing
10441051
try
1045-
return mod.Date(year, month, day)
1052+
return Dates.Date(year, month, day)
10461053
catch ex
10471054
ex isa ArgumentError && return ParserError(ErrParsingDateTime)
10481055
rethrow()
@@ -1062,10 +1069,9 @@ function parse_local_time(l::Parser)
10621069
end
10631070

10641071
function try_return_time(p::Parser{Dates}, h, m, s, ms) where Dates
1065-
if Dates !== nothing || p.Dates !== nothing
1066-
mod = Dates !== nothing ? Dates : p.Dates
1072+
if Dates !== nothing
10671073
try
1068-
return mod.Time(h, m, s, ms)
1074+
return Dates.Time(h, m, s, ms)
10691075
catch ex
10701076
ex isa ArgumentError && return ParserError(ErrParsingDateTime)
10711077
rethrow()

stdlib/TOML/test/values.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,6 @@ end
172172
@testset "Array" begin
173173
@test testval("[1,2,3]", Int64[1,2,3])
174174
@test testval("[1.0, 2.0, 3.0]", Float64[1.0, 2.0, 3.0])
175-
@test testval("[1.0, 2.0, 3]", Union{Int64, Float64}[1.0, 2.0, Int64(3)])
175+
@test testval("[1.0, 2.0, 3]", Any[1.0, 2.0, Int64(3)])
176176
@test testval("[1.0, 2, \"foo\"]", Any[1.0, Int64(2), "foo"])
177177
end

0 commit comments

Comments
 (0)