Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.1.0"
[deps]
CircularArrayBuffers = "9de3a189-e0c0-4e15-ba3b-b14b9fb0aec1"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StackViews = "cae243ae-269e-4f55-b966-ac2d0dc13c15"

Expand All @@ -13,6 +14,7 @@ CircularArrayBuffers = "0.1"
MacroTools = "0.5"
StackViews = "0.1"
julia = "1.6"
OnlineStats = "1.0"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
2 changes: 1 addition & 1 deletion src/ReinforcementLearningTrajectories.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ const RLTrajectories = ReinforcementLearningTrajectories
export RLTrajectories

include("patch.jl")

include("traces.jl")
include("samplers.jl")
include("controllers.jl")
include("trajectory.jl")
include("normalization.jl")
include("common/common.jl")

end
198 changes: 198 additions & 0 deletions src/normalization.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import OnlineStats: OnlineStats, Group, Moments, fit!, OnlineStat, Weight, EqualWeight, mean, std
export scalar_normalizer, array_normalizer, NormalizedTraces, Normalizer
import MacroTools.@forward

"""
Normalizer(::OnlineStat)

Wraps an OnlineStat to be used by a [`NormalizedTraces`](@ref).
"""
struct Normalizer{OS<:OnlineStat}
os::OS
end

@forward Normalizer.os OnlineStats.mean, OnlineStats.std, Base.iterate, normalize, Base.length

#Treats last dim as batch dim
function OnlineStats.fit!(n::Normalizer, data::AbstractArray)
for d in eachslice(data, dims = ndims(data))
fit!(n.os, vec(d))
end
n
end

function OnlineStats.fit!(n::Normalizer{<:Group}, y::AbstractVector)
fit!(n.os, y)
n
end

function OnlineStats.fit!(n::Normalizer, y)
for yi in y
fit!(n.os, vec(yi))
end
n
end

function OnlineStats.fit!(n::Normalizer{<:Moments}, y::AbstractVector{<:Number})
for yi in y
fit!(n.os, yi)
end
n
end

function OnlineStats.fit!(n::Normalizer, data::Number)
fit!(n.os, data)
n
end

"""
normalize(os::Moments, x)

Given an Moments estimate of the elements of x, a vector of scalar traces,
normalizes x elementwise to zero mean, and unit variance.
"""
function normalize(os::Moments, x)
T = eltype(x)
m, s = T(mean(os)), T(std(os))
return (x .- m) ./ s
end

"""
normalize(os::Group{<:AbstractVector{<:Moments}}, x)

Given an os::Group{<:Tuple{Moments}}, that is, a multivariate estimator of the moments of
each element of x,
normalizes each element of x to zero mean, and unit variance. Treats the last dimension as
a batch dimension if `ndims(x) >= 2`.
"""
function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector)
T = eltype(x)
m = [T(mean(stat)) for stat in os]
s = [T(std(stat)) for stat in os]
return (x .- m) ./ s
end

function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractArray)
xn = similar(x)
for (i, slice) in enumerate(eachslice(x, dims = ndims(x)))
xn[repeat([:], ndims(x)-1)..., i] .= reshape(normalize(os, vec(slice)), size(x)[1:end-1]...)
end
return xn
end

function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector{<:AbstractArray})
xn = similar(x)
for (i,el) in enumerate(x)
xn[i] = normalize(os, vec(el))
end
return xn
end

"""
scalar_normalizer(;weights = OnlineStats.EqualWeight())

Returns preconfigured normalizer for scalar traces such as rewards. By default, all samples
have equal weights in the computation of the moments.
See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/)
to use variants such as exponential weights to favor the most recent observations.
"""
scalar_normalizer(; weight::Weight = EqualWeight()) = Normalizer(Moments(weight = weight))

"""
array_normalizer(size::Tuple{Int}; weights = OnlineStats.EqualWeight())

Returns preconfigured normalizer for array traces such as vector or matrix states.
`size` is a tuple containing the dimension sizes of a state. E.g. `(10,)` for a 10-elements
vector, or `(252,252)` for a square image.
By default, all samples have equal weights in the computation of the moments.
See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/)
to use variants such as exponential weights to favor the most recent observations.
"""
array_normalizer(size::NTuple{N,Int}; weight::Weight = EqualWeight()) where N = Normalizer(Group([Moments(weight = weight) for _ in 1:prod(size)]))

"""
NormalizedTraces(traces::AbstractTraces, normalizers::NamedTuple)
NormalizedTraces(traces::AbstractTraces; trace_normalizer_pairs...)

Wraps an [`AbstractTraces`](@ref) and a `NamedTuple` of `Symbol` => [`Normalizer`](@ref)
pairs.
When pushing new elements to the traces, a `NormalizedTraces` will first update a running
estimate of the moments of traces present in the keys of `normalizers`.
When sampling a normalized trace, it will first normalize the samples to zero mean and unit
variance. Traces that do not have a normalizer are sample as usual.

Note that when used in combination with [`Episodes`](@ref), `NormalizedTraces` must wrap
the `Episodes` struct, not the inner `AbstractTraces` contained in an `Episode`, otherwise
the running estimate will reset after each episode.

When used with a MultiplexTraces, the normalizer used with for one symbol (e.g. :state) will
be the same used for the other one (e.g. :next_state).

Preconfigured normalizers are provided for scalar (see [`scalar_normalizer`](@ref)) and
arrays (see [`array_normalizer`](@ref)).

# Examples
```
t = CircularArraySARTTraces(capacity = 10, state = Float64 => (5,))
nt = NormalizedTraces(t, reward = scalar_normalizer(), state = array_normalizer((5,)))
# :next_state will also be normalized.
traj = Trajectory(
container = nt,
sampler = BatchSampler(10)
)
```
"""
struct NormalizedTraces{names, TT, T <: AbstractTraces{names, TT}, normnames, N} <: AbstractTraces{names, TT}
traces::T
normalizers::NamedTuple{normnames, N}
end

function NormalizedTraces(traces::AbstractTraces{names, TT}; trace_normalizer_pairs...) where names where TT
for key in keys(trace_normalizer_pairs)
@assert key in keys(traces) "Traces do not have key $key, valid keys are $(keys(traces))."
end
nt = (; trace_normalizer_pairs...)
for trace in traces.traces
#check if all traces of MultiplexTraces are in pairs
if trace isa MultiplexTraces
if length(intersect(keys(trace), keys(trace_normalizer_pairs))) in [0, length(keys(trace))] #check if none or all keys are in normalizers
continue
else #if not then one is missing
present_key = only(intersect(keys(trace), keys(trace_normalizer_pairs)))
absent_key = only(setdiff(keys(trace), keys(trace_normalizer_pairs)))
nt = merge(nt, (;(absent_key => nt[present_key],)...)) #assign the same normalizer
end
end
end
NormalizedTraces{names, TT, typeof(traces), keys(nt), typeof(values(nt))}(traces, nt)
end

function Base.show(io::IO, ::MIME"text/plain", t::NormalizedTraces{names,T}) where {names,T}
s = nameof(typeof(t))
println(io, "$s with $(length(names)) entries:")
for n in names
print(io, " :$n => $(summary(t[n]))")
if n in keys(t.normalizers)
println(io, " => Normalized")
else
println(io, "")
end
end
end

@forward NormalizedTraces.traces Base.length, Base.size, Base.lastindex, Base.firstindex, Base.getindex, Base.view, Base.pop!, Base.popfirst!, Base.empty!, Base.parent

for f in (:push!, :pushfirst!, :append!, :prepend!)
@eval function Base.$f(nt::NormalizedTraces, x::NamedTuple)
for key in intersect(keys(nt.normalizers), keys(x))
fit!(nt.normalizers[key], x[key])
end
$f(nt.traces, x)
end
end

function sample(s::BatchSampler, nt::NormalizedTraces, names)
inds = rand(s.rng, 1:length(nt), s.batch_size)
maybe_normalize(data, key) = key in keys(nt.normalizers) ? normalize(nt.normalizers[key], data) : data
NamedTuple{names}(s.transformer(maybe_normalize(nt[x][inds], x) for x in names))
end
2 changes: 1 addition & 1 deletion src/rendering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function inner_convert(::Type{Term.AbstractRenderable}, x; style="gray1", width=
end

Base.convert(T::Type{Term.AbstractRenderable}, t::Trace{<:AbstractArray}; kw...) = convert(T, Trace(collect(eachslice(t.x, dims=ndims(t.x)))); kw..., type=typeof(t), subtitle="size: $(size(t.x))")

Base.convert(T::Type{Term.AbstractRenderable}, t::NormalizedTrace; kw...) = convert(T, t.trace; kw..., type = typeof(t))
function Base.convert(
::Type{Term.AbstractRenderable},
t::Trace{<:AbstractVector};
Expand Down
21 changes: 14 additions & 7 deletions src/samplers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ end

"""
BatchSampler{names}(;batch_size, rng=Random.GLOBAL_RNG, transformer=identity)
BatchSampler{names}(batch_size ;rng=Random.GLOBAL_RNG, transformer=identity)

Uniformly sample a batch of examples for each trace specified in `names`. By default, all the traces will be sampled.
Uniformly sample a batch of examples for each trace specified in `names`.
By default, all the traces will be sampled.

See also [`sample`](@ref).
"""
Expand All @@ -27,18 +29,20 @@ sample(s::BatchSampler{names}, t::AbstractTraces) where {names} = sample(s, t, n

function sample(s::BatchSampler, t::AbstractTraces, names)
inds = rand(s.rng, 1:length(t), s.batch_size)
NamedTuple{names}(s.transformer(t[x][inds]) for x in names)
NamedTuple{names}(s.transformer(t[x][inds] for x in names))
end

"""
MetaSampler(::NamedTuple)

Wraps a NamedTuple containing multiple samplers. When sampled, returns a named tuple with a batch from each sampler.
Wraps a NamedTuple containing multiple samplers. When sampled, returns a named tuple with a
batch from each sampler.
Used internally for algorithms that sample multiple times per epoch.

# Example

```
MetaSampler(policy = BatchSampler(10), critic = BatchSampler(100))
```
"""
struct MetaSampler{names,T} <: AbstractSampler
samplers::NamedTuple{names,T}
Expand All @@ -52,11 +56,14 @@ sample(s::MetaSampler, t) = map(x -> sample(x, t), s.samplers)
"""
MultiBatchSampler(sampler, n)

Wraps a sampler. When sampled, will sample n batches using sampler. Useful in combination with MetaSampler to allow different sampling rates between samplers.
Wraps a sampler. When sampled, will sample n batches using sampler. Useful in combination
with MetaSampler to allow different sampling rates between samplers.

# Example

MetaSampler(policy = MultiBatchSampler(BatchSampler(10), 3), critic = MultiBatchSampler(BatchSampler(100), 5))
```
MetaSampler(policy = MultiBatchSampler(BatchSampler(10), 3),
critic = MultiBatchSampler(BatchSampler(100), 5))
```
"""
struct MultiBatchSampler{S<:AbstractSampler} <: AbstractSampler
sampler::S
Expand Down
2 changes: 1 addition & 1 deletion src/traces.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ end

function MultiplexTraces{names}(t) where {names}
if length(names) != 2
throw(ArgumentError("MultiplexTraces has exactly two sub traces, got $length(names) trace names"))
throw(ArgumentError("MultiplexTraces has exactly two sub traces, got $(length(names)) trace names"))
end
trace = convert(AbstractTrace, t)
MultiplexTraces{names,typeof(trace),eltype(trace)}(trace)
Expand Down
41 changes: 41 additions & 0 deletions test/normalization.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
using Test
using ReinforcementLearningTrajectories
import ReinforcementLearningTrajectories: sample
import OnlineStats: mean, std

@testset "normalization.jl" begin
t = CircularArraySARTTraces(capacity = 10, state = Float64 => (5,))
nt = NormalizedTraces(t, reward = scalar_normalizer(), state = array_normalizer((5,)))
m = mean(0:4)
s = std(0:4)
ss = std([0,1,2,2,3,4])
for i in 0:4
r = ((1.0:5.0) .+ i) .% 5
push!(nt, (state = [r;], action = 1, reward = Float32(i), terminal = false))
end
push!(nt, (next_state = fill(m, 5), next_action = 1)) #this also updates state moments

@test mean(nt.normalizers[:reward].os) == m && std(nt.normalizers[:reward].os) == s
@test all(nt.normalizers[:state].os) do moments
mean(moments) == m && std(moments) == ss
end

unnormalized_batch = t[[1:5;]]
@test unnormalized_batch[:reward] == [0:4;]
@test extrema(unnormalized_batch[:state]) == (0, 4)
normalized_batch = nt[[1:5;]]

traj = Trajectory(
container = nt,
sampler = BatchSampler(1000),
controller = InsertSampleRatioController(ratio = Inf, threshold = 0)
)
normalized_batch = sample(traj)
@test all(extrema(normalized_batch[:state]) .≈ ((0, 4) .- m)./ss)
@test all(extrema(normalized_batch[:next_state]) .≈ ((0, 4) .- m)./ss)
@test all(extrema(normalized_batch[:reward]) .≈ ((0, 4) .- m)./s)
#check for no mutation
unnormalized_batch = t[[1:5;]]
@test unnormalized_batch[:reward] == [0:4;]
@test extrema(unnormalized_batch[:state]) == (0, 4)
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ using Test
include("common.jl")
include("samplers.jl")
include("trajectories.jl")
include("normalization.jl")
include("samplers.jl")
end