DLR-RM
diff --git a/‎docs/misc/changelog.rst‎
Lines changed: 25 additions & 0 deletions b/‎docs/misc/changelog.rst‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎stable_baselines3/common/atari_wrappers.py‎
Lines changed: 2 additions & 2 deletions b/‎stable_baselines3/common/atari_wrappers.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎stable_baselines3/common/base_class.py‎
Lines changed: 2 additions & 2 deletions b/‎stable_baselines3/common/base_class.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎stable_baselines3/common/buffers.py‎
Lines changed: 6 additions & 2 deletions b/‎stable_baselines3/common/buffers.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎stable_baselines3/common/callbacks.py‎
Lines changed: 6 additions & 5 deletions b/‎stable_baselines3/common/callbacks.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎stable_baselines3/common/cmd_util.py‎
Lines changed: 3 additions & 3 deletions b/‎stable_baselines3/common/cmd_util.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎stable_baselines3/common/distributions.py‎
Lines changed: 2 additions & 2 deletions b/‎stable_baselines3/common/distributions.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎stable_baselines3/common/logger.py‎
Lines changed: 1 addition & 1 deletion b/‎stable_baselines3/common/logger.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎stable_baselines3/common/monitor.py‎
Lines changed: 6 additions & 4 deletions b/‎stable_baselines3/common/monitor.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎stable_baselines3/common/noise.py‎
Lines changed: 1 addition & 1 deletion b/‎stable_baselines3/common/noise.py‎
Lines changed: 1 addition & 1 deletion
@@ -3,6 +3,31 @@
 Changelog
 ==========
 
+
+Pre-Release 0.10.0a0 (WIP)
+------------------------------
+
+Breaking Changes:
+^^^^^^^^^^^^^^^^^
+
+New Features:
+^^^^^^^^^^^^^
+
+Bug Fixes:
+^^^^^^^^^^
+
+Deprecations:
+^^^^^^^^^^^^^
+
+Others:
+^^^^^^^
+- Improved typing coverage
+- Improved error messages for unsupported spaces
+
+Documentation:
+^^^^^^^^^^^^^^
+
+
 Pre-Release 0.9.0 (2020-10-03)
 ------------------------------
 
 
@@ -9,7 +9,7 @@
 except ImportError:
     cv2 = None
 
-from stable_baselines3.common.type_aliases import GymStepReturn
+from stable_baselines3.common.type_aliases import GymObs, GymStepReturn
 
 
 class NoopResetEnv(gym.Wrapper):
@@ -146,7 +146,7 @@ def step(self, action: int) -> GymStepReturn:
 
         return max_frame, total_reward, done, info
 
-    def reset(self, **kwargs):
+    def reset(self, **kwargs) -> GymObs:
         return self.env.reset(**kwargs)
 
 
 
@@ -478,7 +478,7 @@ def set_parameters(
         load_path_or_dict: Union[str, Dict[str, Dict]],
         exact_match: bool = True,
         device: Union[th.device, str] = "auto",
-    ):
+    ) -> None:
         """
         Load parameters from a given zip-file or a nested dictionary containing parameters for
         different modules (see ``get_parameters``).
@@ -610,7 +610,7 @@ def load(
             model.policy.reset_noise()  # pytype: disable=attribute-error
         return model
 
-    def get_parameters(self):
+    def get_parameters(self) -> Dict[str, Dict]:
         """
         Return the parameters of the agent. This includes parameters from different networks, e.g.
         critics (value functions) and policies (pi functions).
 
@@ -1,4 +1,5 @@
 import warnings
+from abc import ABC, abstractmethod
 from typing import Generator, Optional, Union
 
 import numpy as np
@@ -16,7 +17,7 @@
 from stable_baselines3.common.vec_env import VecNormalize
 
 
-class BaseBuffer(object):
+class BaseBuffer(ABC):
     """
     Base class that represent a buffer (rollout or replay)
 
@@ -102,7 +103,10 @@ def sample(self, batch_size: int, env: Optional[VecNormalize] = None):
         batch_inds = np.random.randint(0, upper_bound, size=batch_size)
         return self._get_samples(batch_inds, env=env)
 
-    def _get_samples(self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None):
+    @abstractmethod
+    def _get_samples(
+        self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None
+    ) -> Union[ReplayBufferSamples, RolloutBufferSamples]:
         """
         :param batch_inds:
         :param env:
 
@@ -1,7 +1,7 @@
 import os
 import warnings
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Union
 
 import gym
 import numpy as np
@@ -217,9 +217,10 @@ class CheckpointCallback(BaseCallback):
     :param save_freq:
     :param save_path: Path to the folder where the model will be saved.
     :param name_prefix: Common prefix to the saved models
+    :param verbose:
     """
 
-    def __init__(self, save_freq: int, save_path: str, name_prefix="rl_model", verbose=0):
+    def __init__(self, save_freq: int, save_path: str, name_prefix: str = "rl_model", verbose: int = 0):
         super(CheckpointCallback, self).__init__(verbose)
         self.save_freq = save_freq
         self.save_path = save_path
@@ -247,7 +248,7 @@ class ConvertCallback(BaseCallback):
     :param verbose:
     """
 
-    def __init__(self, callback, verbose=0):
+    def __init__(self, callback: Callable, verbose: int = 0):
         super(ConvertCallback, self).__init__(verbose)
         self.callback = callback
 
@@ -314,7 +315,7 @@ def __init__(
         self.evaluations_timesteps = []
         self.evaluations_length = []
 
-    def _init_callback(self):
+    def _init_callback(self) -> None:
         # Does not work in some corner cases, where the wrapper is not the same
         if not isinstance(self.training_env, type(self.eval_env)):
             warnings.warn("Training and eval env are not of the same type" f"{self.training_env} != {self.eval_env}")
@@ -450,7 +451,7 @@ def __init__(self, max_episodes: int, verbose: int = 0):
         self._total_max_episodes = max_episodes
         self.n_episodes = 0
 
-    def _init_callback(self):
+    def _init_callback(self) -> None:
         # At start set total max according to number of envirnments
         self._total_max_episodes = self.max_episodes * self.training_env.num_envs
 
 
@@ -6,7 +6,7 @@
 
 from stable_baselines3.common.atari_wrappers import AtariWrapper
 from stable_baselines3.common.monitor import Monitor
-from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
+from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecEnv
 
 
 def make_vec_env(
@@ -19,7 +19,7 @@ def make_vec_env(
     env_kwargs: Optional[Dict[str, Any]] = None,
     vec_env_cls: Optional[Type[Union[DummyVecEnv, SubprocVecEnv]]] = None,
     vec_env_kwargs: Optional[Dict[str, Any]] = None,
-):
+) -> VecEnv:
     """
     Create a wrapped, monitored ``VecEnv``.
     By default it uses a ``DummyVecEnv`` which is usually faster
@@ -85,7 +85,7 @@ def make_atari_env(
     env_kwargs: Optional[Dict[str, Any]] = None,
     vec_env_cls: Optional[Union[DummyVecEnv, SubprocVecEnv]] = None,
     vec_env_kwargs: Optional[Dict[str, Any]] = None,
-):
+) -> VecEnv:
     """
     Create a wrapped, monitored VecEnv for Atari.
     It is a wrapper around ``make_vec_env`` that includes common preprocessing for Atari games.
 
@@ -1,7 +1,7 @@
 """Probability distributions."""
 
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import gym
 import torch as th
@@ -19,7 +19,7 @@ def __init__(self):
         super(Distribution, self).__init__()
 
     @abstractmethod
-    def proba_distribution_net(self, *args, **kwargs):
+    def proba_distribution_net(self, *args, **kwargs) -> Union[nn.Module, Tuple[nn.Module, nn.Parameter]]:
         """Create the layers and parameters that represent the distribution.
 
         Subclasses must define this, but the arguments and return type vary between
 
@@ -50,7 +50,7 @@ class SeqWriter(object):
     sequence writer
     """
 
-    def write_sequence(self, sequence: List):
+    def write_sequence(self, sequence: List) -> None:
         """
         write_sequence an array to file
 
 
@@ -5,12 +5,14 @@
 import os
 import time
 from glob import glob
-from typing import Any, Dict, List, Optional, Tuple
+from typing import List, Optional, Tuple, Union
 
 import gym
 import numpy as np
 import pandas
 
+from stable_baselines3.common.type_aliases import GymObs, GymStepReturn
+
 
 class Monitor(gym.Wrapper):
     """
@@ -62,7 +64,7 @@ def __init__(
         self.total_steps = 0
         self.current_reset_info = {}  # extra info about the current episode, that was passed in during reset()
 
-    def reset(self, **kwargs) -> np.ndarray:
+    def reset(self, **kwargs) -> GymObs:
         """
         Calls the Gym environment reset. Can only be called if the environment is over, or if allow_early_resets is True
 
@@ -83,7 +85,7 @@ def reset(self, **kwargs) -> np.ndarray:
             self.current_reset_info[key] = value
         return self.env.reset(**kwargs)
 
-    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict[Any, Any]]:
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
         """
         Step the environment with the given action
 
@@ -112,7 +114,7 @@ def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict[Any, A
         self.total_steps += 1
         return observation, reward, done, info
 
-    def close(self):
+    def close(self) -> None:
         """
         Closes the environment
         """
 
@@ -139,7 +139,7 @@ def base_noise(self) -> ActionNoise:
         return self._base_noise
 
     @base_noise.setter
-    def base_noise(self, base_noise: ActionNoise):
+    def base_noise(self, base_noise: ActionNoise) -> None:
         if base_noise is None:
             raise ValueError("Expected base_noise to be an instance of ActionNoise, not None", ActionNoise)
         if not isinstance(base_noise, ActionNoise):