JuliaReinforcementLearning · findmyway · May 31, 2022 · May 11, 2022 · May 11, 2022 · May 11, 2022
diff --git a/Project.toml b/Project.toml
@@ -5,6 +5,7 @@ version = "0.1.0"
 [deps]
 CircularArrayBuffers = "9de3a189-e0c0-4e15-ba3b-b14b9fb0aec1"
 MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
+OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StackViews = "cae243ae-269e-4f55-b966-ac2d0dc13c15"
 
@@ -13,6 +14,7 @@ CircularArrayBuffers = "0.1"
 MacroTools = "0.5"
 StackViews = "0.1"
 julia = "1.6"
+OnlineStats = "1.0"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

diff --git a/src/ReinforcementLearningTrajectories.jl b/src/ReinforcementLearningTrajectories.jl
@@ -4,11 +4,11 @@ const RLTrajectories = ReinforcementLearningTrajectories
 export RLTrajectories
 
 include("patch.jl")
-
 include("traces.jl")
 include("samplers.jl")
 include("controllers.jl")
 include("trajectory.jl")
+include("normalization.jl")
 include("common/common.jl")
 
 end
diff --git a/src/normalization.jl b/src/normalization.jl
@@ -0,0 +1,198 @@
+import OnlineStats: OnlineStats, Group, Moments, fit!, OnlineStat, Weight, EqualWeight, mean, std
+export scalar_normalizer, array_normalizer, NormalizedTraces, Normalizer
+import MacroTools.@forward
+
+"""
+    Normalizer(::OnlineStat)
+
+Wraps an OnlineStat to be used by a [`NormalizedTraces`](@ref).
+"""
+struct Normalizer{OS<:OnlineStat}
+    os::OS
+end
+
+@forward Normalizer.os OnlineStats.mean, OnlineStats.std, Base.iterate, normalize, Base.length
+
+#Treats last dim as batch dim
+function OnlineStats.fit!(n::Normalizer, data::AbstractArray)
+    for d in eachslice(data, dims = ndims(data))
+        fit!(n.os, vec(d))
+    end
+    n
+end
+
+function OnlineStats.fit!(n::Normalizer{<:Group}, y::AbstractVector)
+    fit!(n.os, y)
+    n
+end
+
+function OnlineStats.fit!(n::Normalizer, y)
+    for yi in y
+        fit!(n.os, vec(yi))
+    end
+    n
+end
+
+function OnlineStats.fit!(n::Normalizer{<:Moments}, y::AbstractVector{<:Number})
+    for yi in y
+        fit!(n.os, yi)
+    end
+    n
+end
+
+function OnlineStats.fit!(n::Normalizer, data::Number)
+    fit!(n.os, data)
+    n
+end
+
+"""
+    normalize(os::Moments, x)
+
+Given an Moments estimate of the elements of x, a vector of scalar traces,
+normalizes x elementwise to zero mean, and unit variance. 
+"""
+function normalize(os::Moments, x)
+    T = eltype(x)
+    m, s = T(mean(os)), T(std(os))
+    return (x .- m) ./ s
+end
+
+"""
+    normalize(os::Group{<:AbstractVector{<:Moments}}, x)
+
+Given an os::Group{<:Tuple{Moments}}, that is, a multivariate estimator of the moments of 
+each element of x,
+normalizes each element of x to zero mean, and unit variance. Treats the last dimension as 
+a batch dimension if `ndims(x) >= 2`.
+"""
+function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector)
+    T = eltype(x)
+    m = [T(mean(stat)) for stat in os]
+    s = [T(std(stat)) for stat in os]
+    return (x .- m) ./ s
+end
+
+function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractArray) 
+    xn = similar(x)
+    for (i, slice) in enumerate(eachslice(x, dims = ndims(x)))
+        xn[repeat([:], ndims(x)-1)..., i] .= reshape(normalize(os, vec(slice)), size(x)[1:end-1]...) 
+    end
+    return xn
+end
+
+function normalize(os::Group{<:AbstractVector{<:Moments}}, x::AbstractVector{<:AbstractArray})
+    xn = similar(x)
+    for (i,el) in enumerate(x)
+        xn[i] = normalize(os, vec(el))
+    end
+    return xn
+end
+
+"""
+    scalar_normalizer(;weights = OnlineStats.EqualWeight())
+
+Returns preconfigured normalizer for scalar traces such as rewards. By default, all samples 
+have equal weights in the computation of the moments.
+See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/) 
+to use variants such as exponential weights to favor the most recent observations.
+"""
+scalar_normalizer(; weight::Weight = EqualWeight()) = Normalizer(Moments(weight = weight))
+
+"""
+    array_normalizer(size::Tuple{Int}; weights = OnlineStats.EqualWeight())
+
+Returns preconfigured normalizer for array traces such as vector or matrix states. 
+`size` is a tuple containing the dimension sizes of a state. E.g. `(10,)` for a 10-elements
+vector, or `(252,252)` for a square image.
+By default, all samples have equal weights in the computation of the moments.
+See the [OnlineStats documentation](https://joshday.github.io/OnlineStats.jl/stable/weights/) 
+to use variants such as exponential weights to favor the most recent observations.
+"""
+array_normalizer(size::NTuple{N,Int}; weight::Weight = EqualWeight()) where N = Normalizer(Group([Moments(weight = weight) for _ in 1:prod(size)]))
+
+"""
+    NormalizedTraces(traces::AbstractTraces, normalizers::NamedTuple)
+    NormalizedTraces(traces::AbstractTraces; trace_normalizer_pairs...)
+
+Wraps an [`AbstractTraces`](@ref) and a `NamedTuple` of `Symbol` => [`Normalizer`](@ref) 
+pairs. 
+When pushing new elements to the traces, a `NormalizedTraces` will first update a running 
+estimate of the moments of traces present in the keys of `normalizers`.
+When sampling a normalized trace, it will first normalize the samples to zero mean and unit 
+variance. Traces that do not have a normalizer are sample as usual.
+
+Note that when used in combination with [`Episodes`](@ref), `NormalizedTraces` must wrap 
+the `Episodes` struct, not the inner `AbstractTraces` contained in an `Episode`, otherwise 
+the running estimate will reset after each episode.
+
+When used with a MultiplexTraces, the normalizer used with for one symbol (e.g. :state) will
+be the same used for the other one (e.g. :next_state).
+
+Preconfigured normalizers are provided for scalar (see [`scalar_normalizer`](@ref)) and 
+arrays (see [`array_normalizer`](@ref)).
+
+# Examples
+```
+t = CircularArraySARTTraces(capacity = 10, state = Float64 => (5,))
+nt = NormalizedTraces(t, reward = scalar_normalizer(), state = array_normalizer((5,)))
+# :next_state will also be normalized. 
+traj = Trajectory(
+    container = nt,
+    sampler = BatchSampler(10)
+)
+```
+"""
+struct NormalizedTraces{names, TT, T <: AbstractTraces{names, TT}, normnames, N} <: AbstractTraces{names, TT}
+    traces::T
+    normalizers::NamedTuple{normnames, N}
+end 
+
+function NormalizedTraces(traces::AbstractTraces{names, TT}; trace_normalizer_pairs...) where names where TT
+    for key in keys(trace_normalizer_pairs)
+        @assert key in keys(traces) "Traces do not have key $key, valid keys are $(keys(traces))."
+    end
+    nt = (; trace_normalizer_pairs...)
+    for trace in traces.traces
+        #check if all traces of MultiplexTraces are in pairs
+        if trace isa MultiplexTraces
+            if length(intersect(keys(trace), keys(trace_normalizer_pairs))) in [0, length(keys(trace))] #check if none or all keys are in normalizers
+                continue
+            else #if not then one is missing
+                present_key = only(intersect(keys(trace), keys(trace_normalizer_pairs)))
+                absent_key = only(setdiff(keys(trace), keys(trace_normalizer_pairs)))
+                nt = merge(nt, (;(absent_key => nt[present_key],)...)) #assign the same normalizer
+            end
+        end
+    end
+    NormalizedTraces{names, TT, typeof(traces), keys(nt), typeof(values(nt))}(traces, nt)
+end
+
+function Base.show(io::IO, ::MIME"text/plain", t::NormalizedTraces{names,T}) where {names,T}
+    s = nameof(typeof(t))
+    println(io, "$s with $(length(names)) entries:")
+    for n in names
+        print(io, "  :$n => $(summary(t[n]))")
+        if n in keys(t.normalizers)
+            println(io, " => Normalized")
+        else
+            println(io, "")
+        end
+    end
+end
+
+@forward NormalizedTraces.traces Base.length, Base.size, Base.lastindex, Base.firstindex, Base.getindex, Base.view, Base.pop!, Base.popfirst!, Base.empty!, Base.parent
+
+for f in (:push!, :pushfirst!, :append!, :prepend!)
+    @eval function Base.$f(nt::NormalizedTraces, x::NamedTuple)
+        for key in intersect(keys(nt.normalizers), keys(x))
+            fit!(nt.normalizers[key], x[key])
+        end
+        $f(nt.traces, x)
+    end
+end
+
+function sample(s::BatchSampler, nt::NormalizedTraces, names)
+    inds = rand(s.rng, 1:length(nt), s.batch_size)
+    maybe_normalize(data, key) = key in keys(nt.normalizers) ? normalize(nt.normalizers[key], data) : data 
+    NamedTuple{names}(s.transformer(maybe_normalize(nt[x][inds], x) for x in names))
+end
diff --git a/src/rendering.jl b/src/rendering.jl
@@ -19,7 +19,7 @@ function inner_convert(::Type{Term.AbstractRenderable}, x; style="gray1", width=
 end
 
 Base.convert(T::Type{Term.AbstractRenderable}, t::Trace{<:AbstractArray}; kw...) = convert(T, Trace(collect(eachslice(t.x, dims=ndims(t.x)))); kw..., type=typeof(t), subtitle="size: $(size(t.x))")
-
+Base.convert(T::Type{Term.AbstractRenderable}, t::NormalizedTrace; kw...) = convert(T, t.trace; kw..., type = typeof(t))
 function Base.convert(
     ::Type{Term.AbstractRenderable},
     t::Trace{<:AbstractVector};

diff --git a/src/samplers.jl b/src/samplers.jl
@@ -12,8 +12,10 @@ end
 
 """
     BatchSampler{names}(;batch_size, rng=Random.GLOBAL_RNG, transformer=identity)
+    BatchSampler{names}(batch_size ;rng=Random.GLOBAL_RNG, transformer=identity)
 
-Uniformly sample a batch of examples for each trace specified in `names`. By default, all the traces will be sampled.
+Uniformly sample a batch of examples for each trace specified in `names`. 
+By default, all the traces will be sampled.
 
 See also [`sample`](@ref).
 """
@@ -27,18 +29,20 @@ sample(s::BatchSampler{names}, t::AbstractTraces) where {names} = sample(s, t, n
 
 function sample(s::BatchSampler, t::AbstractTraces, names)
     inds = rand(s.rng, 1:length(t), s.batch_size)
-    NamedTuple{names}(s.transformer(t[x][inds]) for x in names)
+    NamedTuple{names}(s.transformer(t[x][inds] for x in names))
 end
 
 """
     MetaSampler(::NamedTuple)
 
-Wraps a NamedTuple containing multiple samplers. When sampled, returns a named tuple with a batch from each sampler.
+Wraps a NamedTuple containing multiple samplers. When sampled, returns a named tuple with a 
+batch from each sampler.
 Used internally for algorithms that sample multiple times per epoch.
 
 # Example
-
+```
 MetaSampler(policy = BatchSampler(10), critic = BatchSampler(100))
+```
 """
 struct MetaSampler{names,T} <: AbstractSampler
     samplers::NamedTuple{names,T}
@@ -52,11 +56,14 @@ sample(s::MetaSampler, t) = map(x -> sample(x, t), s.samplers)
 """
     MultiBatchSampler(sampler, n)
 
-Wraps a sampler. When sampled, will sample n batches using sampler. Useful in combination with MetaSampler to allow different sampling rates between samplers.
+Wraps a sampler. When sampled, will sample n batches using sampler. Useful in combination 
+with MetaSampler to allow different sampling rates between samplers.
 
 # Example
-
-MetaSampler(policy = MultiBatchSampler(BatchSampler(10), 3), critic = MultiBatchSampler(BatchSampler(100), 5))
+```
+MetaSampler(policy = MultiBatchSampler(BatchSampler(10), 3), 
+            critic = MultiBatchSampler(BatchSampler(100), 5))
+```
 """
 struct MultiBatchSampler{S<:AbstractSampler} <: AbstractSampler
     sampler::S

diff --git a/src/traces.jl b/src/traces.jl
@@ -81,7 +81,7 @@ end
 
 function MultiplexTraces{names}(t) where {names}
     if length(names) != 2
-        throw(ArgumentError("MultiplexTraces has exactly two sub traces, got $length(names) trace names"))
+        throw(ArgumentError("MultiplexTraces has exactly two sub traces, got $(length(names)) trace names"))
     end
     trace = convert(AbstractTrace, t)
     MultiplexTraces{names,typeof(trace),eltype(trace)}(trace)

diff --git a/test/normalization.jl b/test/normalization.jl
@@ -0,0 +1,41 @@
+using Test
+using ReinforcementLearningTrajectories
+import ReinforcementLearningTrajectories: sample
+import OnlineStats: mean, std
+
+@testset "normalization.jl" begin
+    t = CircularArraySARTTraces(capacity = 10, state = Float64 => (5,))
+    nt = NormalizedTraces(t, reward = scalar_normalizer(), state = array_normalizer((5,)))
+    m = mean(0:4)
+    s = std(0:4)
+    ss = std([0,1,2,2,3,4])
+    for i in 0:4
+        r = ((1.0:5.0) .+ i) .% 5
+        push!(nt, (state = [r;], action = 1, reward = Float32(i), terminal = false))
+    end
+    push!(nt, (next_state = fill(m, 5), next_action = 1)) #this also updates state moments
+
+    @test mean(nt.normalizers[:reward].os) == m && std(nt.normalizers[:reward].os) == s
+    @test all(nt.normalizers[:state].os) do moments
+        mean(moments) == m && std(moments) == ss
+    end
+
+    unnormalized_batch = t[[1:5;]]
+    @test unnormalized_batch[:reward] == [0:4;]
+    @test extrema(unnormalized_batch[:state]) == (0, 4)
+    normalized_batch = nt[[1:5;]]
+
+    traj = Trajectory(
+        container = nt,
+        sampler = BatchSampler(1000),
+        controller = InsertSampleRatioController(ratio = Inf, threshold = 0)
+    )
+    normalized_batch = sample(traj)
+    @test all(extrema(normalized_batch[:state]) .≈ ((0, 4) .- m)./ss)
+    @test all(extrema(normalized_batch[:next_state]) .≈ ((0, 4) .- m)./ss)
+    @test all(extrema(normalized_batch[:reward]) .≈ ((0, 4) .- m)./s)
+    #check for no mutation
+    unnormalized_batch = t[[1:5;]]
+    @test unnormalized_batch[:reward] == [0:4;]
+    @test extrema(unnormalized_batch[:state]) == (0, 4)                     
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -7,5 +7,6 @@ using Test
     include("common.jl")
     include("samplers.jl")
     include("trajectories.jl")
+    include("normalization.jl")
     include("samplers.jl")
 end