Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
using .PyCall


np = pyimport("numpy")

export PettingzooEnv
export PettingZooEnv


"""
PettingzooEnv(;kwargs...)
PettingZooEnv(;kwargs...)

`PettingzooEnv` is an interface of the python library pettingzoo for multi agent reinforcement learning environments. It can be used to test multi
`PettingZooEnv` is an interface of the python library Pettingzoo for multi agent reinforcement learning environments. It can be used to test multi
agent reinforcement learning algorithms implemented in JUlia ReinforcementLearning.
"""
function PettingzooEnv(name::String; seed=123, args...)

function PettingZooEnv(name::String; seed=123, args...)
if !PyCall.pyexists("pettingzoo.$name")
error("Cannot import pettingzoo.$name")
end
Expand All @@ -20,7 +21,7 @@ function PettingzooEnv(name::String; seed=123, args...)
pyenv.reset(seed=seed)
obs_space = space_transform(pyenv.observation_space(pyenv.agents[1]))
act_space = space_transform(pyenv.action_space(pyenv.agents[1]))
env = PettingzooEnv{typeof(act_space),typeof(obs_space),typeof(pyenv)}(
env = PettingZooEnv{typeof(act_space),typeof(obs_space),typeof(pyenv)}(
pyenv,
obs_space,
act_space,
Expand All @@ -33,13 +34,12 @@ end

# basic function needed for simulation ========================================================================

function RLBase.reset!(env::PettingzooEnv)
function RLBase.reset!(env::PettingZooEnv)
pycall!(env.state, env.pyenv.reset, PyObject, env.seed)
env.ts = 1
nothing
end

function RLBase.is_terminated(env::PettingzooEnv)
function RLBase.is_terminated(env::PettingZooEnv)
_, _, t, d, _ = pycall(env.pyenv.last, PyObject)
t || d
end
Expand All @@ -48,96 +48,96 @@ end

## State / observation implementations ========================================================================

RLBase.state(env::PettingzooEnv, ::Observation{Any}, players::Tuple) = Dict(p => state(env, p) for p in players)
RLBase.state(env::PettingZooEnv, ::Observation{Any}, players::Tuple) = Dict(p => state(env, p) for p in players)


# partial observability is default for pettingzoo
function RLBase.state(env::PettingzooEnv, ::Observation{Any}, player)
function RLBase.state(env::PettingZooEnv, ::Observation{Any}, player)
env.pyenv.observe(player)
end


## state space =========================================================================================================================================

RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, players) = Space(Dict(player => state_space(env, player) for player in players))
RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, players) = Space(Dict(player => state_space(env, player) for player in players))

# partial observability
RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, player::String) = space_transform(env.pyenv.observation_space(player))
RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, player::Symbol) = space_transform(env.pyenv.observation_space(String(player)))

# for full observability. Be careful: action_space has also to be adjusted
# RLBase.state_space(env::PettingzooEnv, ::Observation{Any}, player::String) = space_transform(env.pyenv.state_space)
# RLBase.state_space(env::PettingZooEnv, ::Observation{Any}, player::String) = space_transform(env.pyenv.state_space)


## action space implementations ====================================================================================

RLBase.action_space(env::PettingzooEnv, players::Tuple{String}) =
RLBase.action_space(env::PettingZooEnv, players::Tuple{Symbol}) =
Space(Dict(p => action_space(env, p) for p in players))

RLBase.action_space(env::PettingzooEnv, player::String) = space_transform(env.pyenv.action_space(player))
RLBase.action_space(env::PettingZooEnv, player::Symbol) = space_transform(env.pyenv.action_space(String(player)))

RLBase.action_space(env::PettingzooEnv, player::Integer) = space_transform(env.pyenv.action_space(env.pyenv.agents[player]))
RLBase.action_space(env::PettingZooEnv, player::Integer) = space_transform(env.pyenv.action_space(env.pyenv.agents[player]))

RLBase.action_space(env::PettingzooEnv, player::DefaultPlayer) = env.action_space
RLBase.action_space(env::PettingZooEnv, player::DefaultPlayer) = env.action_space

## action functions ========================================================================================================================

function RLBase.act!(env::PettingzooEnv, actions::Dict, players::Tuple)
@assert length(actions) == length(players)
env.ts += 1
for p in players
env(actions[p])
function RLBase.act!(env::PettingZooEnv, actions::Dict{Symbol, Int})
@assert length(actions) == length(players(env))
for p in env.pyenv.agents
pycall(env.pyenv.step, PyObject, actions[p])
end
end

function RLBase.act!(env::PettingzooEnv, actions::Dict, player)
@assert length(actions) == length(players(env))
for p in players(env)
env(actions[p])
function RLBase.act!(env::PettingZooEnv, actions::Dict{Symbol, Real})
@assert length(actions) == length(env.pyenv.agents)
for p in env.pyenv.agents
pycall(env.pyenv.step, PyObject, np.array(actions[p]; dtype=np.float32))
end
end

function RLBase.act!(env::PettingzooEnv, actions::Dict{String, Int})
@assert length(actions) == length(players(env))
function RLBase.act!(env::PettingZooEnv, actions::Dict{Symbol, Vector})
@assert length(actions) == length(env.pyenv.agents)
for p in env.pyenv.agents
pycall(env.pyenv.step, PyObject, actions[p])
RLBase.act!(env, p)
end
end

function RLBase.act!(env::PettingzooEnv, actions::Dict{String, Real})
@assert length(actions) == length(players(env))
env.ts += 1
for p in env.pyenv.agents
pycall(env.pyenv.step, PyObject, np.array(actions[p]; dtype=np.float32))
function RLBase.act!(env::PettingZooEnv, actions::NamedTuple)
@assert length(actions) == length(env.pyenv.agents)
for player ∈ players(env)
RLBase.act!(env, actions[player])
end
end

function RLBase.act!(env::PettingzooEnv, action::Vector)
# for vectors, pettingzoo need them to be in proper numpy type
function RLBase.act!(env::PettingZooEnv, action::Vector)
pycall(env.pyenv.step, PyObject, np.array(action; dtype=np.float32))
end

function RLBase.act!(env::PettingzooEnv, action::Integer)
env.ts += 1
function RLBase.act!(env::PettingZooEnv, action)
pycall(env.pyenv.step, PyObject, action)
end

# reward of player ======================================================================================================================
function RLBase.reward(env::PettingzooEnv, player::String)
env.pyenv.rewards[player]
function RLBase.reward(env::PettingZooEnv, player::Symbol)
env.pyenv.rewards[String(player)]
end


# Multi agent part =========================================================================================================================================


RLBase.players(env::PettingzooEnv) = env.pyenv.agents
RLBase.players(env::PettingZooEnv) = Symbol.(env.pyenv.agents)

function RLBase.current_player(env::PettingZooEnv)
return Symbol(env.pyenv.agents[env.current_player])
end

function RLBase.current_player(env::PettingzooEnv, post_action=false)
cur_id = env.ts % length(env.pyenv.agents) == 0 ? length(env.pyenv.agents) : env.ts % length(env.pyenv.agents)
cur_id = post_action ? (cur_id - 1 == 0 ? length(env.pyenv.agents) : cur_id - 1) : cur_id
return env.pyenv.agents[cur_id]
function RLBase.next_player!(env::PettingZooEnv)
env.current_player = env.current_player < length(env.pyenv.agents) ? env.current_player + 1 : 1
end

function RLBase.NumAgentStyle(env::PettingzooEnv)
function RLBase.NumAgentStyle(env::PettingZooEnv)
n = length(env.pyenv.agents)
if n == 1
SingleAgent()
Expand All @@ -146,9 +146,8 @@ function RLBase.NumAgentStyle(env::PettingzooEnv)
end
end


RLBase.DynamicStyle(::PettingzooEnv) = SEQUENTIAL
RLBase.ActionStyle(::PettingzooEnv) = MINIMAL_ACTION_SET
RLBase.InformationStyle(::PettingzooEnv) = IMPERFECT_INFORMATION
RLBase.ChanceStyle(::PettingzooEnv) = EXPLICIT_STOCHASTIC
RLBase.DynamicStyle(::PettingZooEnv) = SIMULTANEOUS
RLBase.ActionStyle(::PettingZooEnv) = MINIMAL_ACTION_SET
RLBase.InformationStyle(::PettingZooEnv) = IMPERFECT_INFORMATION
RLBase.ChanceStyle(::PettingZooEnv) = EXPLICIT_STOCHASTIC

Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
mutable struct PettingzooEnv{Ta,To,P} <: AbstractEnv
pyenv::P
observation_space::To
action_space::Ta
state::P
seed::Union{Int, Nothing}
ts::Int
# Parametrization:
# Ta : Type of action_space
# To : Type of observation_space
# P : Type of environment most common: PyObject

mutable struct PettingZooEnv{Ta,To,P} <: AbstractEnv
pyenv::P
observation_space::To
action_space::Ta
state::P
seed::Union{Int, Nothing}
current_player::Int
end

export PettingzooEnv

struct GymEnv{T,Ta,To,P} <: AbstractEnv
Expand Down