@@ -18,8 +18,8 @@ def __init__(self, env: gym.Env, noop_max: int = 30):
1818 Sample initial states by taking random number of no-ops on reset.
1919 No-op is assumed to be action 0.
2020
21- :param env: (gym.Env) the environment to wrap
22- :param noop_max: (int) the maximum value of no-ops to run
21+ :param env: the environment to wrap
22+ :param noop_max: the maximum value of no-ops to run
2323 """
2424 gym .Wrapper .__init__ (self , env )
2525 self .noop_max = noop_max
@@ -47,7 +47,7 @@ def __init__(self, env: gym.Env):
4747 """
4848 Take action on reset for environments that are fixed until firing.
4949
50- :param env: (gym.Env) the environment to wrap
50+ :param env: the environment to wrap
5151 """
5252 gym .Wrapper .__init__ (self , env )
5353 assert env .unwrapped .get_action_meanings ()[1 ] == "FIRE"
@@ -70,7 +70,7 @@ def __init__(self, env: gym.Env):
7070 Make end-of-life == end-of-episode, but only reset on true game over.
7171 Done by DeepMind for the DQN and co. since it helps value estimation.
7272
73- :param env: (gym.Env) the environment to wrap
73+ :param env: the environment to wrap
7474 """
7575 gym .Wrapper .__init__ (self , env )
7676 self .lives = 0
@@ -97,7 +97,7 @@ def reset(self, **kwargs) -> np.ndarray:
9797 and the learner need not know about any of this behind-the-scenes.
9898
9999 :param kwargs: Extra keywords passed to env.reset() call
100- :return: (np.ndarray) the first observation of the environment
100+ :return: the first observation of the environment
101101 """
102102 if self .was_real_done :
103103 obs = self .env .reset (** kwargs )
@@ -113,8 +113,8 @@ def __init__(self, env: gym.Env, skip: int = 4):
113113 """
114114 Return only every ``skip``-th frame (frameskipping)
115115
116- :param env: (gym.Env) the environment
117- :param skip: (int) number of ``skip``-th frame
116+ :param env: the environment
117+ :param skip: number of ``skip``-th frame
118118 """
119119 gym .Wrapper .__init__ (self , env )
120120 # most recent raw observations (for max pooling across time steps)
@@ -126,8 +126,8 @@ def step(self, action: int) -> GymStepReturn:
126126 Step the environment with the given action
127127 Repeat action, sum reward, and max over last observations.
128128
129- :param action: ([int] or [float]) the action
130- :return: ([int] or [float], [float], [bool], dict) observation, reward, done, information
129+ :param action: the action
130+ :return: observation, reward, done, information
131131 """
132132 total_reward = 0.0
133133 done = None
@@ -155,16 +155,16 @@ def __init__(self, env: gym.Env):
155155 """
156156 Clips the reward to {+1, 0, -1} by its sign.
157157
158- :param env: (gym.Env) the environment
158+ :param env: the environment
159159 """
160160 gym .RewardWrapper .__init__ (self , env )
161161
162162 def reward (self , reward : float ) -> float :
163163 """
164164 Bin reward to {+1, 0, -1} by its sign.
165165
166- :param reward: (float)
167- :return: (float)
166+ :param reward:
167+ :return:
168168 """
169169 return np .sign (reward )
170170
@@ -175,9 +175,9 @@ def __init__(self, env: gym.Env, width: int = 84, height: int = 84):
175175 Convert to grayscale and warp frames to 84x84 (default)
176176 as done in the Nature paper and later work.
177177
178- :param env: (gym.Env) the environment
179- :param width: (int)
180- :param height: (int)
178+ :param env: the environment
179+ :param width:
180+ :param height:
181181 """
182182 gym .ObservationWrapper .__init__ (self , env )
183183 self .width = width
@@ -190,8 +190,8 @@ def observation(self, frame: np.ndarray) -> np.ndarray:
190190 """
191191 returns the current observation from a frame
192192
193- :param frame: (np.ndarray) environment frame
194- :return: (np.ndarray) the observation
193+ :param frame: environment frame
194+ :return: the observation
195195 """
196196 frame = cv2 .cvtColor (frame , cv2 .COLOR_RGB2GRAY )
197197 frame = cv2 .resize (frame , (self .width , self .height ), interpolation = cv2 .INTER_AREA )
@@ -212,13 +212,13 @@ class AtariWrapper(gym.Wrapper):
212212 * Grayscale observation
213213 * Clip reward to {-1, 0, 1}
214214
215- :param env: (gym.Env) gym environment
216- :param noop_max: (int) : max number of no-ops
217- :param frame_skip: (int) : the frequency at which the agent experiences the game.
218- :param screen_size: (int) : resize Atari frame
219- :param terminal_on_life_loss: (bool) : if True, then step() returns done=True whenever a
215+ :param env: gym environment
216+ :param noop_max:: max number of no-ops
217+ :param frame_skip:: the frequency at which the agent experiences the game.
218+ :param screen_size:: resize Atari frame
219+ :param terminal_on_life_loss:: if True, then step() returns done=True whenever a
220220 life is lost.
221- :param clip_reward: (bool) If True (default), the reward is clip to {-1, 0, 1} depending on its sign.
221+ :param clip_reward: If True (default), the reward is clip to {-1, 0, 1} depending on its sign.
222222 """
223223
224224 def __init__ (
0 commit comments