Skip to content

Commit

Permalink
Add support for Gymnasium v1.1.0 (#2095)
Browse files Browse the repository at this point in the history
* Add support for Gymnasium v1.1.0

* Update tests to reflect new set_wrapper_attr behavior

* Fix mypy issues

* Update doc and changelog

---------

Co-authored-by: Antonin RAFFIN <[email protected]>
  • Loading branch information
pseudo-rnd-thoughts and araffin authored Mar 4, 2025
1 parent fa21bce commit 55d6f18
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 31 deletions.
3 changes: 3 additions & 0 deletions docs/guide/vec_envs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ Consider the following example for a custom env:
print(vec_env.env_method("get_wrapper_attr", "mu"))
# Change `mu` attribute via the setter
vec_env.env_method("set_mu", "mu", 0.1)
# If the variable exists, you can also use `set_wrapper_attr` to set it
assert vec_env.has_attr("mu")
vec_env.env_method("set_wrapper_attr", "mu", 0.1)
In this example ``env.mu`` cannot be accessed/changed directly because it is wrapped in a ``VecEnv`` and because it could be wrapped with other wrappers (see `GH#1573 <https://github.com/DLR-RM/stable-baselines3/issues/1573>`_ for a longer explanation).
Expand Down
6 changes: 5 additions & 1 deletion docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Changelog
==========

Release 2.6.0a1 (WIP)
Release 2.6.0a2 (WIP)
--------------------------


Expand All @@ -14,6 +14,7 @@ New Features:
^^^^^^^^^^^^^
- Added ``has_attr`` method for ``VecEnv`` to check if an attribute exists
- Added ``LogEveryNTimesteps`` callback to dump logs every N timesteps (note: you need to pass ``log_interval=None`` to avoid any interference)
- Added Gymnasium v1.1 support

Bug Fixes:
^^^^^^^^^^
Expand All @@ -36,11 +37,14 @@ Others:
^^^^^^^
- Updated black from v24 to v25
- Improved error messages when checking Box space equality (loading ``VecNormalize``)
- Updated test to reflect how ``set_wrapper_attr`` should be used now

Documentation:
^^^^^^^^^^^^^^
- Clarify the use of Gym wrappers with ``make_vec_env`` in the section on Vectorized Environments (@pstahlhofen)
- Updated callback doc for ``EveryNTimesteps``
- Added doc on how to set env attributes via ``VecEnv`` calls


Release 2.5.0 (2025-01-27)
--------------------------
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
packages=[package for package in find_packages() if package.startswith("stable_baselines3")],
package_data={"stable_baselines3": ["py.typed", "version.txt"]},
install_requires=[
"gymnasium>=0.29.1,<1.1.0",
"gymnasium>=0.29.1,<1.2.0",
"numpy>=1.20,<3.0",
"torch>=2.3,<3.0",
# For saving models
Expand Down
6 changes: 3 additions & 3 deletions stable_baselines3/common/off_policy_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def dump_logs(self) -> None:
self.logger.record("time/time_elapsed", int(time_elapsed), exclude="tensorboard")
self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
if self.use_sde:
self.logger.record("train/std", (self.actor.get_std()).mean().item())
self.logger.record("train/std", (self.actor.get_std()).mean().item()) # type: ignore[operator]

if len(self.ep_success_buffer) > 0:
self.logger.record("rollout/success_rate", safe_mean(self.ep_success_buffer))
Expand Down Expand Up @@ -544,14 +544,14 @@ def collect_rollouts(
assert train_freq.unit == TrainFrequencyUnit.STEP, "You must use only one env when doing episodic training."

if self.use_sde:
self.actor.reset_noise(env.num_envs)
self.actor.reset_noise(env.num_envs) # type: ignore[operator]

callback.on_rollout_start()
continue_training = True
while should_collect_more_steps(train_freq, num_collected_steps, num_collected_episodes):
if self.use_sde and self.sde_sample_freq > 0 and num_collected_steps % self.sde_sample_freq == 0:
# Sample a new noise matrix
self.actor.reset_noise(env.num_envs)
self.actor.reset_noise(env.num_envs) # type: ignore[operator]

# Select action randomly or according to policy
actions, buffer_actions = self._sample_action(learning_starts, action_noise, env.num_envs)
Expand Down
1 change: 1 addition & 0 deletions stable_baselines3/sac/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ def train(self, gradient_steps: int, batch_size: int = 64) -> None:
# so we don't change it with other losses
# see https://github.com/rail-berkeley/softlearning/issues/60
ent_coef = th.exp(self.log_ent_coef.detach())
assert isinstance(self.target_entropy, float)
ent_coef_loss = -(self.log_ent_coef * (log_prob + self.target_entropy).detach()).mean()
ent_coef_losses.append(ent_coef_loss.item())
else:
Expand Down
2 changes: 1 addition & 1 deletion stable_baselines3/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.6.0a1
2.6.0a2
56 changes: 31 additions & 25 deletions tests/test_vec_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def test_vecenv_custom_calls(vec_env_class, vec_env_wrapper):
"""Test access to methods/attributes of vectorized environments"""

def make_env():
return CustomGymEnv(spaces.Box(low=np.zeros(2), high=np.ones(2)))
# Wrap the env to check that get_attr and set_attr are working properly
return Monitor(CustomGymEnv(spaces.Box(low=np.zeros(2), high=np.ones(2))))

vec_env = vec_env_class([make_env for _ in range(N_ENVS)])

Expand Down Expand Up @@ -133,21 +134,23 @@ def make_env():

assert not vec_env.has_attr("dummy2")
# Set the value on the original env
# Note: doesn't work anymore with gym >= 1.1,
# the value needs to exists before
# `set_wrapper_attr` doesn't exist before v1.0
if gym.__version__ > "1":
vec_env.env_method("set_wrapper_attr", "dummy2", 2)
assert vec_env.get_attr("dummy2") == [2] * N_ENVS
if vec_env_class == DummyVecEnv:
assert vec_env.envs[0].unwrapped.dummy2 == 2
# if vec_env_class == DummyVecEnv:
# assert vec_env.envs[0].unwrapped.dummy2 == 2

env_method_results = vec_env.env_method("custom_method", 1, indices=None, dim_1=2)
setattr_results = []
# Set current_step to an arbitrary value
# Set new variable dummy1 of the last wrapper to an arbitrary value
for env_idx in range(N_ENVS):
setattr_results.append(vec_env.set_attr("current_step", env_idx, indices=env_idx))
setattr_results.append(vec_env.set_attr("dummy1", env_idx, indices=env_idx))
# Retrieve the value for each environment
assert vec_env.has_attr("current_step")
getattr_results = vec_env.get_attr("current_step")
assert vec_env.has_attr("dummy1")
getattr_results = vec_env.get_attr("dummy1")

assert len(env_method_results) == N_ENVS
assert len(setattr_results) == N_ENVS
Expand All @@ -165,28 +168,31 @@ def make_env():
assert len(env_method_subset) == 2

# Test to change value for all the environments
setattr_result = vec_env.set_attr("current_step", 42, indices=None)
getattr_result = vec_env.get_attr("current_step")
setattr_result = vec_env.set_attr("dummy1", 42, indices=None)
getattr_result = vec_env.get_attr("dummy1")
assert setattr_result is None
assert getattr_result == [42 for _ in range(N_ENVS)]

# Additional tests for setattr that does not affect all the environments
vec_env.reset()
setattr_result = vec_env.set_attr("current_step", 12, indices=[0, 1])
getattr_result = vec_env.get_attr("current_step")
getattr_result_subset = vec_env.get_attr("current_step", indices=[0, 1])
assert setattr_result is None
assert getattr_result == [12 for _ in range(2)] + [0 for _ in range(N_ENVS - 2)]
assert getattr_result_subset == [12, 12]
assert vec_env.get_attr("current_step", indices=[0, 2]) == [12, 0]

vec_env.reset()
# Change value only for first and last environment
setattr_result = vec_env.set_attr("current_step", 12, indices=[0, -1])
getattr_result = vec_env.get_attr("current_step")
assert setattr_result is None
assert getattr_result == [12] + [0 for _ in range(N_ENVS - 2)] + [12]
assert vec_env.get_attr("current_step", indices=[-1]) == [12]
# Since gym >= 0.29, set_attr only sets the attribute on the last wrapper
# but `set_wrapper_attr` doesn't exist before v1.0
if gym.__version__ > "1":
setattr_result = vec_env.env_method("set_wrapper_attr", "current_step", 12, indices=[0, 1])
getattr_result = vec_env.get_attr("current_step")
getattr_result_subset = vec_env.get_attr("current_step", indices=[0, 1])
assert setattr_result == [True, True]
assert getattr_result == [12 for _ in range(2)] + [0 for _ in range(N_ENVS - 2)]
assert getattr_result_subset == [12, 12]
assert vec_env.get_attr("current_step", indices=[0, 2]) == [12, 0]

vec_env.reset()
# Change value only for first and last environment
setattr_result = vec_env.env_method("set_wrapper_attr", "current_step", 12, indices=[0, -1])
getattr_result = vec_env.get_attr("current_step")
assert setattr_result == [True, True]
assert getattr_result == [12] + [0 for _ in range(N_ENVS - 2)] + [12]
assert vec_env.get_attr("current_step", indices=[-1]) == [12]

# Checks that options are correctly passed
assert vec_env.get_attr("current_options")[0] is None
Expand Down Expand Up @@ -281,7 +287,7 @@ def test_vecenv_terminal_obs(vec_env_class, vec_env_wrapper):
("discrete", spaces.Discrete(2)),
("multidiscrete", spaces.MultiDiscrete([2, 3])),
("multibinary", spaces.MultiBinary(3)),
("continuous", spaces.Box(low=np.zeros(2), high=np.ones(2))),
("continuous", spaces.Box(low=np.zeros(2, dtype=np.float32), high=np.ones(2, dtype=np.float32))),
]
)

Expand Down

0 comments on commit 55d6f18

Please sign in to comment.