Add support for Gymnasium v1.1.0 (#2095)

* Add support for Gymnasium v1.1.0 * Update tests to reflect new set_wrapper_attr behavior * Fix mypy issues * Update doc and changelog --------- Co-authored-by: Antonin RAFFIN <[email protected]>
DLR-RM · Mar 4, 2025 · 55d6f18 · 55d6f18
1 parent fa21bce
commit 55d6f18
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 31 deletions.
diff --git a/docs/guide/vec_envs.rst b/docs/guide/vec_envs.rst
@@ -148,6 +148,9 @@ Consider the following example for a custom env:
 	print(vec_env.env_method("get_wrapper_attr", "mu"))
 	# Change `mu` attribute via the setter
 	vec_env.env_method("set_mu", "mu", 0.1)
+	# If the variable exists, you can also use `set_wrapper_attr` to set it
+	assert vec_env.has_attr("mu")
+	vec_env.env_method("set_wrapper_attr", "mu", 0.1)
 
 
 In this example ``env.mu`` cannot be accessed/changed directly because it is wrapped in a ``VecEnv`` and because it could be wrapped with other wrappers (see `GH#1573 <https://github.com/DLR-RM/stable-baselines3/issues/1573>`_ for a longer explanation).

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -3,7 +3,7 @@
 Changelog
 ==========
 
-Release 2.6.0a1 (WIP)
+Release 2.6.0a2 (WIP)
 --------------------------
 
 
@@ -14,6 +14,7 @@ New Features:
 ^^^^^^^^^^^^^
 - Added ``has_attr`` method for ``VecEnv`` to check if an attribute exists
 - Added ``LogEveryNTimesteps`` callback to dump logs every N timesteps (note: you need to pass ``log_interval=None`` to avoid any interference)
+- Added Gymnasium v1.1 support
 
 Bug Fixes:
 ^^^^^^^^^^
@@ -36,11 +37,14 @@ Others:
 ^^^^^^^
 - Updated black from v24 to v25
 - Improved error messages when checking Box space equality (loading ``VecNormalize``)
+- Updated test to reflect how ``set_wrapper_attr`` should be used now
 
 Documentation:
 ^^^^^^^^^^^^^^
 - Clarify the use of Gym wrappers with ``make_vec_env`` in the section on Vectorized Environments (@pstahlhofen)
 - Updated callback doc for ``EveryNTimesteps``
+- Added doc on how to set env attributes via ``VecEnv`` calls
+
 
 Release 2.5.0 (2025-01-27)
 --------------------------

diff --git a/setup.py b/setup.py
@@ -76,7 +76,7 @@
     packages=[package for package in find_packages() if package.startswith("stable_baselines3")],
     package_data={"stable_baselines3": ["py.typed", "version.txt"]},
     install_requires=[
-        "gymnasium>=0.29.1,<1.1.0",
+        "gymnasium>=0.29.1,<1.2.0",
         "numpy>=1.20,<3.0",
         "torch>=2.3,<3.0",
         # For saving models

diff --git a/stable_baselines3/common/off_policy_algorithm.py b/stable_baselines3/common/off_policy_algorithm.py
@@ -423,7 +423,7 @@ def dump_logs(self) -> None:
         self.logger.record("time/time_elapsed", int(time_elapsed), exclude="tensorboard")
         self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
         if self.use_sde:
-            self.logger.record("train/std", (self.actor.get_std()).mean().item())
+            self.logger.record("train/std", (self.actor.get_std()).mean().item())  # type: ignore[operator]
 
         if len(self.ep_success_buffer) > 0:
             self.logger.record("rollout/success_rate", safe_mean(self.ep_success_buffer))
@@ -544,14 +544,14 @@ def collect_rollouts(
             assert train_freq.unit == TrainFrequencyUnit.STEP, "You must use only one env when doing episodic training."
 
         if self.use_sde:
-            self.actor.reset_noise(env.num_envs)
+            self.actor.reset_noise(env.num_envs)  # type: ignore[operator]
 
         callback.on_rollout_start()
         continue_training = True
         while should_collect_more_steps(train_freq, num_collected_steps, num_collected_episodes):
             if self.use_sde and self.sde_sample_freq > 0 and num_collected_steps % self.sde_sample_freq == 0:
                 # Sample a new noise matrix
-                self.actor.reset_noise(env.num_envs)
+                self.actor.reset_noise(env.num_envs)  # type: ignore[operator]
 
             # Select action randomly or according to policy
             actions, buffer_actions = self._sample_action(learning_starts, action_noise, env.num_envs)

diff --git a/stable_baselines3/sac/sac.py b/stable_baselines3/sac/sac.py
@@ -228,6 +228,7 @@ def train(self, gradient_steps: int, batch_size: int = 64) -> None:
                 # so we don't change it with other losses
                 # see https://github.com/rail-berkeley/softlearning/issues/60
                 ent_coef = th.exp(self.log_ent_coef.detach())
+                assert isinstance(self.target_entropy, float)
                 ent_coef_loss = -(self.log_ent_coef * (log_prob + self.target_entropy).detach()).mean()
                 ent_coef_losses.append(ent_coef_loss.item())
             else:

diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt
@@ -1 +1 @@
-2.6.0a1
+2.6.0a2
diff --git a/tests/test_vec_envs.py b/tests/test_vec_envs.py
@@ -97,7 +97,8 @@ def test_vecenv_custom_calls(vec_env_class, vec_env_wrapper):
     """Test access to methods/attributes of vectorized environments"""
 
     def make_env():
-        return CustomGymEnv(spaces.Box(low=np.zeros(2), high=np.ones(2)))
+        # Wrap the env to check that get_attr and set_attr are working properly
+        return Monitor(CustomGymEnv(spaces.Box(low=np.zeros(2), high=np.ones(2))))
 
     vec_env = vec_env_class([make_env for _ in range(N_ENVS)])
 
@@ -133,21 +134,23 @@ def make_env():
 
     assert not vec_env.has_attr("dummy2")
     # Set the value on the original env
+    # Note: doesn't work anymore with gym >= 1.1,
+    # the value needs to exists before
     # `set_wrapper_attr` doesn't exist before v1.0
     if gym.__version__ > "1":
         vec_env.env_method("set_wrapper_attr", "dummy2", 2)
         assert vec_env.get_attr("dummy2") == [2] * N_ENVS
-        if vec_env_class == DummyVecEnv:
-            assert vec_env.envs[0].unwrapped.dummy2 == 2
+        # if vec_env_class == DummyVecEnv:
+        #     assert vec_env.envs[0].unwrapped.dummy2 == 2
 
     env_method_results = vec_env.env_method("custom_method", 1, indices=None, dim_1=2)
     setattr_results = []
-    # Set current_step to an arbitrary value
+    # Set new variable dummy1 of the last wrapper to an arbitrary value
     for env_idx in range(N_ENVS):
-        setattr_results.append(vec_env.set_attr("current_step", env_idx, indices=env_idx))
+        setattr_results.append(vec_env.set_attr("dummy1", env_idx, indices=env_idx))
     # Retrieve the value for each environment
-    assert vec_env.has_attr("current_step")
-    getattr_results = vec_env.get_attr("current_step")
+    assert vec_env.has_attr("dummy1")
+    getattr_results = vec_env.get_attr("dummy1")
 
     assert len(env_method_results) == N_ENVS
     assert len(setattr_results) == N_ENVS
@@ -165,28 +168,31 @@ def make_env():
     assert len(env_method_subset) == 2
 
     # Test to change value for all the environments
-    setattr_result = vec_env.set_attr("current_step", 42, indices=None)
-    getattr_result = vec_env.get_attr("current_step")
+    setattr_result = vec_env.set_attr("dummy1", 42, indices=None)
+    getattr_result = vec_env.get_attr("dummy1")
     assert setattr_result is None
     assert getattr_result == [42 for _ in range(N_ENVS)]
 
     # Additional tests for setattr that does not affect all the environments
     vec_env.reset()
-    setattr_result = vec_env.set_attr("current_step", 12, indices=[0, 1])
-    getattr_result = vec_env.get_attr("current_step")
-    getattr_result_subset = vec_env.get_attr("current_step", indices=[0, 1])
-    assert setattr_result is None
-    assert getattr_result == [12 for _ in range(2)] + [0 for _ in range(N_ENVS - 2)]
-    assert getattr_result_subset == [12, 12]
-    assert vec_env.get_attr("current_step", indices=[0, 2]) == [12, 0]
-
-    vec_env.reset()
-    # Change value only for first and last environment
-    setattr_result = vec_env.set_attr("current_step", 12, indices=[0, -1])
-    getattr_result = vec_env.get_attr("current_step")
-    assert setattr_result is None
-    assert getattr_result == [12] + [0 for _ in range(N_ENVS - 2)] + [12]
-    assert vec_env.get_attr("current_step", indices=[-1]) == [12]
+    # Since gym >= 0.29, set_attr only sets the attribute on the last wrapper
+    # but `set_wrapper_attr` doesn't exist before v1.0
+    if gym.__version__ > "1":
+        setattr_result = vec_env.env_method("set_wrapper_attr", "current_step", 12, indices=[0, 1])
+        getattr_result = vec_env.get_attr("current_step")
+        getattr_result_subset = vec_env.get_attr("current_step", indices=[0, 1])
+        assert setattr_result == [True, True]
+        assert getattr_result == [12 for _ in range(2)] + [0 for _ in range(N_ENVS - 2)]
+        assert getattr_result_subset == [12, 12]
+        assert vec_env.get_attr("current_step", indices=[0, 2]) == [12, 0]
+
+        vec_env.reset()
+        # Change value only for first and last environment
+        setattr_result = vec_env.env_method("set_wrapper_attr", "current_step", 12, indices=[0, -1])
+        getattr_result = vec_env.get_attr("current_step")
+        assert setattr_result == [True, True]
+        assert getattr_result == [12] + [0 for _ in range(N_ENVS - 2)] + [12]
+        assert vec_env.get_attr("current_step", indices=[-1]) == [12]
 
     # Checks that options are correctly passed
     assert vec_env.get_attr("current_options")[0] is None
@@ -281,7 +287,7 @@ def test_vecenv_terminal_obs(vec_env_class, vec_env_wrapper):
         ("discrete", spaces.Discrete(2)),
         ("multidiscrete", spaces.MultiDiscrete([2, 3])),
         ("multibinary", spaces.MultiBinary(3)),
-        ("continuous", spaces.Box(low=np.zeros(2), high=np.ones(2))),
+        ("continuous", spaces.Box(low=np.zeros(2, dtype=np.float32), high=np.ones(2, dtype=np.float32))),
     ]
 )