From 111e54591ab84babac6f168cb3819eb844daacbb Mon Sep 17 00:00:00 2001 From: ClemensSchwarke Date: Wed, 16 Jul 2025 17:52:37 +0200 Subject: [PATCH 01/12] changes for rsl_rl 3.0.0 --- .../reinforcement_learning/rsl_rl/cli_args.py | 8 +- scripts/reinforcement_learning/rsl_rl/play.py | 37 +++++---- .../reinforcement_learning/rsl_rl/train.py | 14 ++-- .../isaaclab_rl/rsl_rl/distillation_cfg.py | 12 +++ .../isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py | 78 +++++++++++++++---- .../isaaclab_rl/rsl_rl/symmetry_cfg.py | 6 +- .../isaaclab_rl/rsl_rl/vecenv_wrapper.py | 57 ++++---------- source/isaaclab_rl/setup.py | 2 +- .../allegro_hand/agents/rsl_rl_ppo_cfg.py | 3 +- .../direct/ant/agents/rsl_rl_ppo_cfg.py | 3 +- .../direct/anymal_c/agents/rsl_rl_ppo_cfg.py | 6 +- .../direct/cartpole/agents/rsl_rl_ppo_cfg.py | 3 +- .../franka_cabinet/agents/rsl_rl_ppo_cfg.py | 3 +- .../direct/humanoid/agents/rsl_rl_ppo_cfg.py | 3 +- .../quadcopter/agents/rsl_rl_ppo_cfg.py | 3 +- .../shadow_hand/agents/rsl_rl_ppo_cfg.py | 9 ++- .../classic/ant/agents/rsl_rl_ppo_cfg.py | 3 +- .../classic/cartpole/agents/rsl_rl_ppo_cfg.py | 3 +- .../classic/humanoid/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/digit/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/a1/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/anymal_b/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/anymal_c/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/anymal_d/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/cassie/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/digit/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/g1/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/go1/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/go2/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/h1/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/spot/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/franka/agents/rsl_rl_ppo_cfg.py | 3 +- .../allegro_hand/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/franka/agents/rsl_rl_ppo_cfg.py | 3 +- .../config/franka/agents/rsl_rl_ppo_cfg.py | 4 +- .../config/ur_10/agents/rsl_rl_ppo_cfg.py | 4 +- .../config/anymal_c/agents/rsl_rl_ppo_cfg.py | 3 +- .../template/templates/agents/rsl_rl_ppo_cfg | 3 +- 38 files changed, 196 insertions(+), 119 deletions(-) diff --git a/scripts/reinforcement_learning/rsl_rl/cli_args.py b/scripts/reinforcement_learning/rsl_rl/cli_args.py index df7e5f0ff8b2..c176f774515c 100644 --- a/scripts/reinforcement_learning/rsl_rl/cli_args.py +++ b/scripts/reinforcement_learning/rsl_rl/cli_args.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg + from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg def add_rsl_rl_args(parser: argparse.ArgumentParser): @@ -39,7 +39,7 @@ def add_rsl_rl_args(parser: argparse.ArgumentParser): ) -def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlOnPolicyRunnerCfg: +def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlBaseRunnerCfg: """Parse configuration for RSL-RL agent based on inputs. Args: @@ -52,12 +52,12 @@ def parse_rsl_rl_cfg(task_name: str, args_cli: argparse.Namespace) -> RslRlOnPol from isaaclab_tasks.utils.parse_cfg import load_cfg_from_registry # load the default configuration - rslrl_cfg: RslRlOnPolicyRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point") + rslrl_cfg: RslRlBaseRunnerCfg = load_cfg_from_registry(task_name, "rsl_rl_cfg_entry_point") rslrl_cfg = update_rsl_rl_cfg(rslrl_cfg, args_cli) return rslrl_cfg -def update_rsl_rl_cfg(agent_cfg: RslRlOnPolicyRunnerCfg, args_cli: argparse.Namespace): +def update_rsl_rl_cfg(agent_cfg: RslRlBaseRunnerCfg, args_cli: argparse.Namespace): """Update configuration for RSL-RL agent based on inputs. Args: diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index 150bbd034927..e502bf70c30d 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -58,7 +58,7 @@ import time import torch -from rsl_rl.runners import OnPolicyRunner +from rsl_rl.runners import DistillationRunner, OnPolicyRunner from isaaclab.envs import ( DirectMARLEnv, @@ -71,7 +71,7 @@ from isaaclab.utils.dict import print_dict from isaaclab.utils.pretrained_checkpoint import get_published_pretrained_checkpoint -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper, export_policy_as_jit, export_policy_as_onnx import isaaclab_tasks # noqa: F401 from isaaclab_tasks.utils import get_checkpoint_path @@ -88,7 +88,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen train_task_name = task_name.replace("-Play", "") # override configurations with non-hydra CLI arguments - agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) + agent_cfg: RslRlBaseRunnerCfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) env_cfg.scene.num_envs = args_cli.num_envs if args_cli.num_envs is not None else env_cfg.scene.num_envs # set the environment seed @@ -136,32 +136,43 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen print(f"[INFO]: Loading model checkpoint from: {resume_path}") # load previously trained model - ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) - ppo_runner.load(resume_path) + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") + runner.load(resume_path) # obtain the trained policy for inference - policy = ppo_runner.get_inference_policy(device=env.unwrapped.device) + policy = runner.get_inference_policy(device=env.unwrapped.device) # extract the neural network module # we do this in a try-except to maintain backwards compatibility. try: # version 2.3 onwards - policy_nn = ppo_runner.alg.policy + policy_nn = runner.alg.policy except AttributeError: # version 2.2 and below - policy_nn = ppo_runner.alg.actor_critic + policy_nn = runner.alg.actor_critic + + # extract the normalizer + if hasattr(policy_nn, "actor_obs_normalizer"): + normalizer = policy_nn.actor_obs_normalizer + elif hasattr(policy_nn, "student_obs_normalizer"): + normalizer = policy_nn.student_obs_normalizer + else: + normalizer = None # export policy to onnx/jit export_model_dir = os.path.join(os.path.dirname(resume_path), "exported") - export_policy_as_jit(policy_nn, ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.pt") - export_policy_as_onnx( - policy_nn, normalizer=ppo_runner.obs_normalizer, path=export_model_dir, filename="policy.onnx" - ) + export_policy_as_jit(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.pt") + export_policy_as_onnx(policy_nn, normalizer=normalizer, path=export_model_dir, filename="policy.onnx") dt = env.unwrapped.step_dt # reset environment - obs, _ = env.get_observations() + obs = env.get_observations() timestep = 0 # simulate environment while simulation_app.is_running(): diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index ff6ed50c333f..0a003bf55200 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -15,7 +15,6 @@ # local imports import cli_args # isort: skip - # add argparse arguments parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") parser.add_argument("--video", action="store_true", default=False, help="Record videos during training.") @@ -79,7 +78,7 @@ from datetime import datetime import omni -from rsl_rl.runners import OnPolicyRunner +from rsl_rl.runners import DistillationRunner, OnPolicyRunner from isaaclab.envs import ( DirectMARLEnv, @@ -91,7 +90,7 @@ from isaaclab.utils.dict import print_dict from isaaclab.utils.io import dump_pickle, dump_yaml -from isaaclab_rl.rsl_rl import RslRlOnPolicyRunnerCfg, RslRlVecEnvWrapper +from isaaclab_rl.rsl_rl import RslRlBaseRunnerCfg, RslRlVecEnvWrapper import isaaclab_tasks # noqa: F401 from isaaclab_tasks.utils import get_checkpoint_path @@ -106,7 +105,7 @@ @hydra_task_config(args_cli.task, args_cli.agent) -def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg): +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): """Train with RSL-RL agent.""" # override configurations with non-hydra CLI arguments agent_cfg = cli_args.update_rsl_rl_cfg(agent_cfg, args_cli) @@ -178,7 +177,12 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen env = RslRlVecEnvWrapper(env, clip_actions=agent_cfg.clip_actions) # create runner from rsl-rl - runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + if agent_cfg.class_name == "OnPolicyRunner": + runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + elif agent_cfg.class_name == "DistillationRunner": + runner = DistillationRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device) + else: + raise ValueError(f"Unsupported runner class: {agent_cfg.class_name}") # write git state to logs runner.add_git_repo_to_log(__file__) # load the checkpoint diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py index 3571511c3661..9be53e662951 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py @@ -28,6 +28,12 @@ class RslRlDistillationStudentTeacherCfg: noise_std_type: Literal["scalar", "log"] = "scalar" """The type of noise standard deviation for the policy. Default is scalar.""" + student_obs_normalization: bool = False + """Whether to normalize the observation for the student network. Default is False.""" + + teacher_obs_normalization: bool = False + """Whether to normalize the observation for the teacher network. Default is False.""" + student_hidden_dims: list[int] = MISSING """The hidden dimensions of the student network.""" @@ -81,3 +87,9 @@ class RslRlDistillationAlgorithmCfg: max_grad_norm: None | float = None """The maximum norm the gradient is clipped to.""" + + optimizer: Literal["adam", "adamw", "sgd", "rmsprop"] = "adam" + """The optimizer to use for the student policy.""" + + loss_type: Literal["mse", "huber"] = "mse" + """The loss type to use for the student policy.""" diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py index 81a00b1e7a6b..2698acb21f6b 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py @@ -32,6 +32,12 @@ class RslRlPpoActorCriticCfg: noise_std_type: Literal["scalar", "log"] = "scalar" """The type of noise standard deviation for the policy. Default is scalar.""" + actor_obs_normalization: bool = False + """Whether to normalize the observation for the actor network. Default is False.""" + + critic_obs_normalization: bool = False + """Whether to normalize the observation for the critic network. Default is False.""" + actor_hidden_dims: list[int] = MISSING """The hidden dimensions of the actor network.""" @@ -114,14 +120,12 @@ class RslRlPpoAlgorithmCfg: Otherwise, the advantage is normalized over the entire collected trajectories. """ + rnd_cfg: RslRlRndCfg | None = None + """The RND configuration. Default is None, in which case RND is not used.""" + symmetry_cfg: RslRlSymmetryCfg | None = None """The symmetry configuration. Default is None, in which case symmetry is not used.""" - rnd_cfg: RslRlRndCfg | None = None - """The configuration for the Random Network Distillation (RND) module. Default is None, - in which case RND is not used. - """ - ######################### # Runner configurations # @@ -129,8 +133,8 @@ class RslRlPpoAlgorithmCfg: @configclass -class RslRlOnPolicyRunnerCfg: - """Configuration of the runner for on-policy algorithms.""" +class RslRlBaseRunnerCfg: + """Base configuration of the runner.""" seed: int = 42 """The seed for the experiment. Default is 42.""" @@ -144,17 +148,30 @@ class RslRlOnPolicyRunnerCfg: max_iterations: int = MISSING """The maximum number of iterations.""" - empirical_normalization: bool = MISSING - """Whether to use empirical normalization.""" + obs_groups: dict[str, list[str]] = MISSING + """A mapping from observation groups to observation sets. - policy: RslRlPpoActorCriticCfg | RslRlDistillationStudentTeacherCfg = MISSING - """The policy configuration.""" + The keys of the dictionary are predefined observation sets used by the underlying algorithm + and values are lists of observation groups provided by the environment. - algorithm: RslRlPpoAlgorithmCfg | RslRlDistillationAlgorithmCfg = MISSING - """The algorithm configuration.""" + For instance, if the environment provides a dictionary of observations with groups "policy", "images", + and "privileged", these can be mapped to algorithmic observation sets as follows: + + .. code-block:: python + + obs_groups = { + "policy": ["policy", "images"], + "critic": ["policy", "privileged"], + } + + This way, the policy will receive the "policy" and "images" observations, and the critic will + receive the "policy" and "privileged" observations. + + For more details, please check ``vec_env.py`` in the rsl_rl library. + """ clip_actions: float | None = None - """The clipping value for actions. If ``None``, then no clipping is done. + """The clipping value for actions. If None, then no clipping is done. Defaults to None. .. note:: This clipping is performed inside the :class:`RslRlVecEnvWrapper` wrapper. @@ -184,7 +201,10 @@ class RslRlOnPolicyRunnerCfg: """The wandb project name. Default is "isaaclab".""" resume: bool = False - """Whether to resume. Default is False.""" + """Whether to resume a previous training. Default is False. + + This flag will be ignored for distillation. + """ load_run: str = ".*" """The run directory to load. Default is ".*" (all). @@ -197,3 +217,31 @@ class RslRlOnPolicyRunnerCfg: If regex expression, the latest (alphabetical order) matching file will be loaded. """ + + +@configclass +class RslRlOnPolicyRunnerCfg(RslRlBaseRunnerCfg): + """Configuration of the runner for on-policy algorithms.""" + + class_name: str = "OnPolicyRunner" + """The runner class name. Default is OnPolicyRunner.""" + + policy: RslRlPpoActorCriticCfg = MISSING + """The policy configuration.""" + + algorithm: RslRlPpoAlgorithmCfg = MISSING + """The algorithm configuration.""" + + +@configclass +class RslRlDistillationRunnerCfg(RslRlBaseRunnerCfg): + """Configuration of the runner for distillation algorithms.""" + + class_name: str = "DistillationRunner" + """The runner class name. Default is DistillationRunner.""" + + policy: RslRlDistillationStudentTeacherCfg = MISSING + """The policy configuration.""" + + algorithm: RslRlDistillationAlgorithmCfg = MISSING + """The algorithm configuration.""" diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py index bf0ecc9a829c..0cd476e848db 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/symmetry_cfg.py @@ -39,13 +39,11 @@ class RslRlSymmetryCfg: Args: env (VecEnv): The environment object. This is used to access the environment's properties. - obs (torch.Tensor | None): The observation tensor. If None, the observation is not used. + obs (tensordict.TensorDict | None): The observation tensor dictionary. If None, the observation is not used. action (torch.Tensor | None): The action tensor. If None, the action is not used. - obs_type (str): The name of the observation type. Defaults to "policy". - This is useful when handling augmentation for different observation groups. Returns: - A tuple containing the augmented observation and action tensors. The tensors can be None, + A tuple containing the augmented observation dictionary and action tensors. The tensors can be None, if their respective inputs are None. """ diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index d909bf2d9128..304831f8b302 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -5,6 +5,7 @@ import gymnasium as gym import torch +from tensordict import TensorDict from rsl_rl.env import VecEnv @@ -12,16 +13,9 @@ class RslRlVecEnvWrapper(VecEnv): - """Wraps around Isaac Lab environment for RSL-RL library - - To use asymmetric actor-critic, the environment instance must have the attributes :attr:`num_privileged_obs` (int). - This is used by the learning agent to allocate buffers in the trajectory memory. Additionally, the returned - observations should have the key "critic" which corresponds to the privileged observations. Since this is - optional for some environments, the wrapper checks if these attributes exist. If they don't then the wrapper - defaults to zero as number of privileged observations. + """Wraps around Isaac Lab environment for the RSL-RL library .. caution:: - This class must be the last wrapper in the wrapper chain. This is because the wrapper does not follow the :class:`gym.Wrapper` interface. Any subsequent wrappers will need to be modified to work with this wrapper. @@ -43,12 +37,14 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N Raises: ValueError: When the environment is not an instance of :class:`ManagerBasedRLEnv` or :class:`DirectRLEnv`. """ + # check that input is valid if not isinstance(env.unwrapped, ManagerBasedRLEnv) and not isinstance(env.unwrapped, DirectRLEnv): raise ValueError( "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:" f" {type(env)}" ) + # initialize the wrapper self.env = env self.clip_actions = clip_actions @@ -63,20 +59,6 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N self.num_actions = self.unwrapped.action_manager.total_action_dim else: self.num_actions = gym.spaces.flatdim(self.unwrapped.single_action_space) - if hasattr(self.unwrapped, "observation_manager"): - self.num_obs = self.unwrapped.observation_manager.group_obs_dim["policy"][0] - else: - self.num_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["policy"]) - # -- privileged observations - if ( - hasattr(self.unwrapped, "observation_manager") - and "critic" in self.unwrapped.observation_manager.group_obs_dim - ): - self.num_privileged_obs = self.unwrapped.observation_manager.group_obs_dim["critic"][0] - elif hasattr(self.unwrapped, "num_states") and "critic" in self.unwrapped.single_observation_space: - self.num_privileged_obs = gym.spaces.flatdim(self.unwrapped.single_observation_space["critic"]) - else: - self.num_privileged_obs = 0 # modify the action space to the clip range self._modify_action_space() @@ -133,14 +115,6 @@ def unwrapped(self) -> ManagerBasedRLEnv | DirectRLEnv: Properties """ - def get_observations(self) -> tuple[torch.Tensor, dict]: - """Returns the current observations of the environment.""" - if hasattr(self.unwrapped, "observation_manager"): - obs_dict = self.unwrapped.observation_manager.compute() - else: - obs_dict = self.unwrapped._get_observations() - return obs_dict["policy"], {"observations": obs_dict} - @property def episode_length_buf(self) -> torch.Tensor: """The episode length buffer.""" @@ -162,13 +136,15 @@ def episode_length_buf(self, value: torch.Tensor): def seed(self, seed: int = -1) -> int: # noqa: D102 return self.unwrapped.seed(seed) - def reset(self) -> tuple[torch.Tensor, dict]: # noqa: D102 - # reset the environment - obs_dict, _ = self.env.reset() - # return observations - return obs_dict["policy"], {"observations": obs_dict} + def get_observations(self) -> TensorDict: + """Returns the current observations of the environment.""" + if hasattr(self.unwrapped, "observation_manager"): + obs_dict = self.unwrapped.observation_manager.compute() + else: + obs_dict = self.unwrapped._get_observations() + return TensorDict(obs_dict, batch_size=[self.num_envs]) - def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]: + def step(self, actions: torch.Tensor) -> tuple[TensorDict, torch.Tensor, torch.Tensor, dict]: # clip actions if self.clip_actions is not None: actions = torch.clamp(actions, -self.clip_actions, self.clip_actions) @@ -176,16 +152,12 @@ def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch obs_dict, rew, terminated, truncated, extras = self.env.step(actions) # compute dones for compatibility with RSL-RL dones = (terminated | truncated).to(dtype=torch.long) - # move extra observations to the extras dict - obs = obs_dict["policy"] - extras["observations"] = obs_dict # move time out information to the extras dict # this is only needed for infinite horizon tasks if not self.unwrapped.cfg.is_finite_horizon: extras["time_outs"] = truncated - # return the step information - return obs, rew, dones, extras + return TensorDict(obs_dict, batch_size=[self.num_envs]), rew, dones, extras def close(self): # noqa: D102 return self.env.close() @@ -200,7 +172,8 @@ def _modify_action_space(self): return # modify the action space to the clip range - # note: this is only possible for the box action space. we need to change it in the future for other action spaces. + # note: this is only possible for the box action space. we need to change it in the future for other + # action spaces. self.env.unwrapped.single_action_space = gym.spaces.Box( low=-self.clip_actions, high=self.clip_actions, shape=(self.num_actions,) ) diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index 706f2b529cd2..3d4c3c7378a1 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -45,7 +45,7 @@ "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11", "gym", ], # rl-games still needs gym :( - "rsl-rl": ["rsl-rl-lib==2.3.3"], + "rsl-rl": ["rsl-rl-lib==3.0.0"], } # Add the names with hyphens as aliases for convenience EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"] diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/rsl_rl_ppo_cfg.py index 6dd5f3c99f2d..8da27d1a7e00 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AllegroHandPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 10000 save_interval = 250 experiment_name = "allegro_hand" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[1024, 512, 256, 128], critic_hidden_dims=[1024, 512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/rsl_rl_ppo_cfg.py index 38b42ea08cbd..5ea9520fec2c 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AntPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1000 save_interval = 50 experiment_name = "ant_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[400, 200, 100], critic_hidden_dims=[400, 200, 100], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/rsl_rl_ppo_cfg.py index 5c11cde53d2e..efdf7d4f991a 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AnymalCFlatPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 500 save_interval = 50 experiment_name = "anymal_c_flat_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[128, 128, 128], critic_hidden_dims=[128, 128, 128], activation="elu", @@ -43,9 +44,10 @@ class AnymalCRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_c_rough_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/rsl_rl_ppo_cfg.py index 81f77fcbd7ac..1cadf22d48c0 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class CartpolePPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 150 save_interval = 50 experiment_name = "cartpole_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[32, 32], critic_hidden_dims=[32, 32], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py index 797777f90056..74788e7b220c 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class FrankaCabinetPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "franka_cabinet_direct" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/rsl_rl_ppo_cfg.py index ebbbdb6990cb..029629225092 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class HumanoidPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1000 save_interval = 50 experiment_name = "humanoid_direct" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[400, 200, 100], critic_hidden_dims=[400, 200, 100], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/rsl_rl_ppo_cfg.py index dae0dee0bf5e..86b2c5508382 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class QuadcopterPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 200 save_interval = 50 experiment_name = "quadcopter_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[64, 64], critic_hidden_dims=[64, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py index 524a799bae37..665c997e635d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class ShadowHandPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 10000 save_interval = 250 experiment_name = "shadow_hand" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[512, 512, 256, 128], critic_hidden_dims=[512, 512, 256, 128], activation="elu", @@ -43,9 +44,10 @@ class ShadowHandAsymFFPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 10000 save_interval = 250 experiment_name = "shadow_hand_openai_ff" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[400, 400, 200, 100], critic_hidden_dims=[512, 512, 256, 128], activation="elu", @@ -72,9 +74,10 @@ class ShadowHandVisionFFPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 50000 save_interval = 250 experiment_name = "shadow_hand_vision" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[1024, 512, 512, 256, 128], critic_hidden_dims=[1024, 512, 512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/rsl_rl_ppo_cfg.py index 7d729795f163..5257b0508681 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AntPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1000 save_interval = 50 experiment_name = "ant" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[400, 200, 100], critic_hidden_dims=[400, 200, 100], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py index f80815b97e38..86ab5309c362 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/rsl_rl_ppo_cfg.py @@ -16,9 +16,10 @@ class CartpolePPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 150 save_interval = 50 experiment_name = "cartpole" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[32, 32], critic_hidden_dims=[32, 32], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py index ae44b8085a1d..663012f94f03 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class HumanoidPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1000 save_interval = 50 experiment_name = "humanoid" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[400, 200, 100], critic_hidden_dims=[400, 200, 100], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py index cb898b1e89c6..942a5230f1d7 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/loco_manipulation/tracking/config/digit/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class DigitLocoManipPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 2000 save_interval = 50 experiment_name = "digit_loco_manip" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[256, 128, 128], critic_hidden_dims=[256, 128, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/rsl_rl_ppo_cfg.py index 99c53ce9d7a7..db162f1228fc 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class UnitreeA1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "unitree_a1_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py index 7e89bf7acd4e..b92ccac2e794 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/rsl_rl_ppo_cfg.py @@ -16,9 +16,10 @@ class AnymalBRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_b_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py index aa620d940309..507f602c3c57 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/rsl_rl_ppo_cfg.py @@ -16,9 +16,10 @@ class AnymalCRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_c_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py index b1db4f60f8a4..c5b2c1c1848d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/rsl_rl_ppo_cfg.py @@ -16,9 +16,10 @@ class AnymalDRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_d_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/rsl_rl_ppo_cfg.py index 9c57f001af14..719f8a241051 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class CassieRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "cassie_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py index ab23e2c7b71c..00be11a490f7 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/digit/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class DigitRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 3000 save_interval = 50 experiment_name = "digit_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/rsl_rl_ppo_cfg.py index 39e93c7dd9eb..946490165380 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class G1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 3000 save_interval = 50 experiment_name = "g1_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/rsl_rl_ppo_cfg.py index 47301907c398..5be515ccc0d6 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class UnitreeGo1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "unitree_go1_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/rsl_rl_ppo_cfg.py index caeafe6bc4a8..e0c6afab9ea6 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class UnitreeGo2RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "unitree_go2_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/rsl_rl_ppo_cfg.py index 39d80f892f25..1163ac744c46 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class H1RoughPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 3000 save_interval = 50 experiment_name = "h1_rough" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py index 155864175c25..951fb421cfce 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/rsl_rl_ppo_cfg.py @@ -14,10 +14,11 @@ class SpotFlatPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 20000 save_interval = 50 experiment_name = "spot_flat" - empirical_normalization = False store_code_state = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/rsl_rl_ppo_cfg.py index 99a4730f8357..ee642fb07aa8 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class CabinetPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 400 save_interval = 50 experiment_name = "franka_open_drawer" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py index c3471f192036..4cbe6266f240 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class AllegroCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 5000 save_interval = 50 experiment_name = "allegro_cube" - empirical_normalization = True policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=True, + critic_obs_normalization=True, actor_hidden_dims=[512, 256, 128], critic_hidden_dims=[512, 256, 128], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/rsl_rl_ppo_cfg.py index 3d519e926b4b..067425a74d48 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class LiftCubePPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "franka_lift" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[256, 128, 64], critic_hidden_dims=[256, 128, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py index 1b51d812d96c..24bea7c5ac14 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/rsl_rl_ppo_cfg.py @@ -15,10 +15,10 @@ class FrankaReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): save_interval = 50 experiment_name = "franka_reach" run_name = "" - resume = False - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[64, 64], critic_hidden_dims=[64, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/rsl_rl_ppo_cfg.py index 287b4ec95f81..1b55830a64ea 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/rsl_rl_ppo_cfg.py @@ -15,10 +15,10 @@ class UR10ReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): save_interval = 50 experiment_name = "reach_ur10" run_name = "" - resume = False - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[64, 64], critic_hidden_dims=[64, 64], activation="elu", diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/rsl_rl_ppo_cfg.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/rsl_rl_ppo_cfg.py index 1ea1a61dba05..4b23def89b2f 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/rsl_rl_ppo_cfg.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/rsl_rl_ppo_cfg.py @@ -14,9 +14,10 @@ class NavigationEnvPPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 1500 save_interval = 50 experiment_name = "anymal_c_navigation" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=0.5, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[128, 128], critic_hidden_dims=[128, 128], activation="elu", diff --git a/tools/template/templates/agents/rsl_rl_ppo_cfg b/tools/template/templates/agents/rsl_rl_ppo_cfg index eaeaf78bfc04..85970dfc2ce4 100644 --- a/tools/template/templates/agents/rsl_rl_ppo_cfg +++ b/tools/template/templates/agents/rsl_rl_ppo_cfg @@ -14,9 +14,10 @@ class PPORunnerCfg(RslRlOnPolicyRunnerCfg): max_iterations = 150 save_interval = 50 experiment_name = "cartpole_direct" - empirical_normalization = False policy = RslRlPpoActorCriticCfg( init_noise_std=1.0, + actor_obs_normalization=False, + critic_obs_normalization=False, actor_hidden_dims=[32, 32], critic_hidden_dims=[32, 32], activation="elu", From 10b4ee84035f2776a6d0550e288715d3ca6a7103 Mon Sep 17 00:00:00 2001 From: ClemensSchwarke Date: Fri, 18 Jul 2025 13:36:23 +0200 Subject: [PATCH 02/12] add emp_normalization parameter and force new rsl_rl version --- scripts/reinforcement_learning/rsl_rl/train.py | 4 ++-- source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index 0a003bf55200..e174cdfd84f7 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -56,9 +56,9 @@ from packaging import version # for distributed training, check minimum supported rsl-rl version -RSL_RL_VERSION = "2.3.1" +RSL_RL_VERSION = "3.0.0" installed_version = metadata.version("rsl-rl-lib") -if args_cli.distributed and version.parse(installed_version) < version.parse(RSL_RL_VERSION): +if version.parse(installed_version) < version.parse(RSL_RL_VERSION): if platform.system() == "Windows": cmd = [r".\isaaclab.bat", "-p", "-m", "pip", "install", f"rsl-rl-lib=={RSL_RL_VERSION}"] else: diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py index 2698acb21f6b..df3e9d564f55 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py @@ -148,6 +148,12 @@ class RslRlBaseRunnerCfg: max_iterations: int = MISSING """The maximum number of iterations.""" + empirical_normalization: bool = None + """This parameter is deprecated and will be removed in the future. + + Use `actor_obs_normalization` and `critic_obs_normalization` instead. + """ + obs_groups: dict[str, list[str]] = MISSING """A mapping from observation groups to observation sets. From 9e3828966b2d062065507ccfa8190e608b57756f Mon Sep 17 00:00:00 2001 From: Clemens Schwarke <96480707+ClemensSchwarke@users.noreply.github.com> Date: Mon, 21 Jul 2025 10:34:57 +0200 Subject: [PATCH 03/12] Add type hint for emp normalization Co-authored-by: Pascal Roth <57946385+pascal-roth@users.noreply.github.com> Signed-off-by: Clemens Schwarke <96480707+ClemensSchwarke@users.noreply.github.com> --- source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py index df3e9d564f55..b3dc415cae4a 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py @@ -148,7 +148,7 @@ class RslRlBaseRunnerCfg: max_iterations: int = MISSING """The maximum number of iterations.""" - empirical_normalization: bool = None + empirical_normalization: bool | None = None """This parameter is deprecated and will be removed in the future. Use `actor_obs_normalization` and `critic_obs_normalization` instead. From 64fd32a3473bbee07f6e501fd9e6b070d504b895 Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Wed, 23 Jul 2025 00:04:33 -0400 Subject: [PATCH 04/12] Update play.py Signed-off-by: Kelly Guo --- scripts/reinforcement_learning/rsl_rl/play.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index e502bf70c30d..9e89c6ff318f 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -81,7 +81,7 @@ @hydra_task_config(args_cli.task, args_cli.agent) -def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlOnPolicyRunnerCfg): +def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agent_cfg: RslRlBaseRunnerCfg): """Play with RSL-RL agent.""" # grab task name for checkpoint path task_name = args_cli.task.split(":")[-1] From 6cf41b57d9aae6184096edaf40c4450775ea8899 Mon Sep 17 00:00:00 2001 From: ClemensSchwarke Date: Mon, 28 Jul 2025 12:20:59 +0200 Subject: [PATCH 05/12] add reset method to vecenv wrapper and fix test --- source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py | 5 +++++ source/isaaclab_rl/test/test_rsl_rl_wrapper.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index 304831f8b302..73ceae04693b 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -136,6 +136,11 @@ def episode_length_buf(self, value: torch.Tensor): def seed(self, seed: int = -1) -> int: # noqa: D102 return self.unwrapped.seed(seed) + def reset(self) -> tuple[TensorDict, dict]: # noqa: D102 + # reset the environment + obs_dict, extras = self.env.reset() + return TensorDict(obs_dict, batch_size=[self.num_envs]), extras + def get_observations(self) -> TensorDict: """Returns the current observations of the environment.""" if hasattr(self.unwrapped, "observation_manager"): diff --git a/source/isaaclab_rl/test/test_rsl_rl_wrapper.py b/source/isaaclab_rl/test/test_rsl_rl_wrapper.py index a88d4864fb20..4eaf921be85c 100644 --- a/source/isaaclab_rl/test/test_rsl_rl_wrapper.py +++ b/source/isaaclab_rl/test/test_rsl_rl_wrapper.py @@ -16,6 +16,7 @@ import gymnasium as gym import torch +from tensordict import TensorDict import carb import omni.usd @@ -161,6 +162,8 @@ def _check_valid_tensor(data: torch.Tensor | dict) -> bool: """ if isinstance(data, torch.Tensor): return not torch.any(torch.isnan(data)) + elif isinstance(data, TensorDict): + return not data.isnan().any() elif isinstance(data, dict): valid_tensor = True for value in data.values(): From ad4fecd2731939cbb4ebb71fbbac99105206d1c3 Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Mon, 28 Jul 2025 13:40:36 -0700 Subject: [PATCH 06/12] trigger CI From dda02b505f50319a39fd2ccdc0031355fbbdcc42 Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Mon, 25 Aug 2025 18:15:46 -0700 Subject: [PATCH 07/12] update pip packaging before running --install --- isaaclab.bat | 7 ++++++- isaaclab.sh | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/isaaclab.bat b/isaaclab.bat index 6923c9ee9174..c13d900987a8 100644 --- a/isaaclab.bat +++ b/isaaclab.bat @@ -332,6 +332,9 @@ if "%arg%"=="-i" ( rem install the python packages in isaaclab/source directory echo [INFO] Installing extensions inside the Isaac Lab repository... call :extract_python_exe + rem bootstrap pip (minimal) + call !python_exe! -m ensurepip --upgrade >nul 2>nul + call !python_exe! -m pip install -U "pip>=25.2" rem check if pytorch is installed and its version rem install pytorch with cuda 12.8 for blackwell support call !python_exe! -m pip list | findstr /C:"torch" >nul @@ -377,7 +380,9 @@ if "%arg%"=="-i" ( rem install the python packages in source directory echo [INFO] Installing extensions inside the Isaac Lab repository... call :extract_python_exe - + rem bootstrap pip (minimal) + call !python_exe! -m ensurepip --upgrade >nul 2>nul + call !python_exe! -m pip install -U "pip>=25.2" rem check if pytorch is installed and its version rem install pytorch with cuda 12.8 for blackwell support call !python_exe! -m pip list | findstr /C:"torch" >nul diff --git a/isaaclab.sh b/isaaclab.sh index fed536e680a4..d9e787c01de1 100755 --- a/isaaclab.sh +++ b/isaaclab.sh @@ -362,6 +362,9 @@ while [[ $# -gt 0 ]]; do # install the python packages in IsaacLab/source directory echo "[INFO] Installing extensions inside the Isaac Lab repository..." python_exe=$(extract_python_exe) + # bootstrap_pip + ${python_exe} -m ensurepip --upgrade || true + ${python_exe} -m pip install -U "pip>=25.2" # check if pytorch is installed and its version # install pytorch with cuda 12.8 for blackwell support if ${python_exe} -m pip list 2>/dev/null | grep -q "torch"; then From b0a649581db195ef806b80e78cc30fc33798cfeb Mon Sep 17 00:00:00 2001 From: Octi Zhang Date: Fri, 29 Aug 2025 16:41:15 -0700 Subject: [PATCH 08/12] constraining packaging version rather than force upgrading pip --- isaaclab.bat | 6 ------ isaaclab.sh | 3 --- source/isaaclab_rl/setup.py | 1 + 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/isaaclab.bat b/isaaclab.bat index c13d900987a8..5780f5d83064 100644 --- a/isaaclab.bat +++ b/isaaclab.bat @@ -332,9 +332,6 @@ if "%arg%"=="-i" ( rem install the python packages in isaaclab/source directory echo [INFO] Installing extensions inside the Isaac Lab repository... call :extract_python_exe - rem bootstrap pip (minimal) - call !python_exe! -m ensurepip --upgrade >nul 2>nul - call !python_exe! -m pip install -U "pip>=25.2" rem check if pytorch is installed and its version rem install pytorch with cuda 12.8 for blackwell support call !python_exe! -m pip list | findstr /C:"torch" >nul @@ -380,9 +377,6 @@ if "%arg%"=="-i" ( rem install the python packages in source directory echo [INFO] Installing extensions inside the Isaac Lab repository... call :extract_python_exe - rem bootstrap pip (minimal) - call !python_exe! -m ensurepip --upgrade >nul 2>nul - call !python_exe! -m pip install -U "pip>=25.2" rem check if pytorch is installed and its version rem install pytorch with cuda 12.8 for blackwell support call !python_exe! -m pip list | findstr /C:"torch" >nul diff --git a/isaaclab.sh b/isaaclab.sh index d9e787c01de1..fed536e680a4 100755 --- a/isaaclab.sh +++ b/isaaclab.sh @@ -362,9 +362,6 @@ while [[ $# -gt 0 ]]; do # install the python packages in IsaacLab/source directory echo "[INFO] Installing extensions inside the Isaac Lab repository..." python_exe=$(extract_python_exe) - # bootstrap_pip - ${python_exe} -m ensurepip --upgrade || true - ${python_exe} -m pip install -U "pip>=25.2" # check if pytorch is installed and its version # install pytorch with cuda 12.8 for blackwell support if ${python_exe} -m pip list 2>/dev/null | grep -q "torch"; then diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index 3d4c3c7378a1..4e99e34d500a 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -33,6 +33,7 @@ "moviepy", # make sure this is consistent with isaac sim version "pillow==11.2.1", + "packaging<24", ] PYTORCH_INDEX_URL = ["https://download.pytorch.org/whl/cu128"] From 1be11a17f94e776c7a37d5ac18cfe046b14e7ed6 Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Fri, 29 Aug 2025 17:15:01 -0700 Subject: [PATCH 09/12] fix license --- .github/workflows/license-exceptions.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/license-exceptions.json b/.github/workflows/license-exceptions.json index 6beb8dab54b2..231e5e470e8f 100644 --- a/.github/workflows/license-exceptions.json +++ b/.github/workflows/license-exceptions.json @@ -390,5 +390,10 @@ "package": "ml_dtypes", "license" : "UNKNOWN", "comment": "Apache 2.0" + }, + { + "package": "zipp", + "license" : "UNKNOWN", + "comment": "MIT" } ] From 902435f5e7fe688bee479246c5830fa2a6db0ed6 Mon Sep 17 00:00:00 2001 From: ClemensSchwarke Date: Mon, 1 Sep 2025 11:24:28 +0200 Subject: [PATCH 10/12] set obs norm default to missing --- source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py | 8 ++++---- source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py index 9be53e662951..d4153d5cf2b0 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/distillation_cfg.py @@ -28,11 +28,11 @@ class RslRlDistillationStudentTeacherCfg: noise_std_type: Literal["scalar", "log"] = "scalar" """The type of noise standard deviation for the policy. Default is scalar.""" - student_obs_normalization: bool = False - """Whether to normalize the observation for the student network. Default is False.""" + student_obs_normalization: bool = MISSING + """Whether to normalize the observation for the student network.""" - teacher_obs_normalization: bool = False - """Whether to normalize the observation for the teacher network. Default is False.""" + teacher_obs_normalization: bool = MISSING + """Whether to normalize the observation for the teacher network.""" student_hidden_dims: list[int] = MISSING """The hidden dimensions of the student network.""" diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py index b3dc415cae4a..90ef6c026652 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py @@ -32,11 +32,11 @@ class RslRlPpoActorCriticCfg: noise_std_type: Literal["scalar", "log"] = "scalar" """The type of noise standard deviation for the policy. Default is scalar.""" - actor_obs_normalization: bool = False - """Whether to normalize the observation for the actor network. Default is False.""" + actor_obs_normalization: bool = MISSING + """Whether to normalize the observation for the actor network.""" - critic_obs_normalization: bool = False - """Whether to normalize the observation for the critic network. Default is False.""" + critic_obs_normalization: bool = MISSING + """Whether to normalize the observation for the critic network.""" actor_hidden_dims: list[int] = MISSING """The hidden dimensions of the actor network.""" From d602566e4c596c20ee8cd2f163813b301066b005 Mon Sep 17 00:00:00 2001 From: ClemensSchwarke Date: Mon, 1 Sep 2025 14:18:13 +0200 Subject: [PATCH 11/12] adapt symmetry scripts to new rsl rl version --- .../classic/cartpole/mdp/symmetry.py | 23 ++++--- .../velocity/mdp/symmetry/anymal.py | 61 ++++++++----------- 2 files changed, 36 insertions(+), 48 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py index 8b13bf7c017f..5bf81c900578 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/mdp/symmetry.py @@ -8,6 +8,7 @@ from __future__ import annotations import torch +from tensordict import TensorDict from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -20,9 +21,8 @@ @torch.no_grad() def compute_symmetric_states( env: ManagerBasedRLEnv, - obs: torch.Tensor | None = None, + obs: TensorDict | None = None, actions: torch.Tensor | None = None, - obs_type: str = "policy", ): """Augments the given observations and actions by applying symmetry transformations. @@ -33,9 +33,8 @@ def compute_symmetric_states( Args: env: The environment instance. - obs: The original observation tensor. Defaults to None. + obs: The original observation tensor dictionary. Defaults to None. actions: The original actions tensor. Defaults to None. - obs_type: The type of observation to augment. Defaults to "policy". Returns: Augmented observations and actions tensors, or None if the respective input was None. @@ -43,25 +42,25 @@ def compute_symmetric_states( # observations if obs is not None: - num_envs = obs.shape[0] + batch_size = obs.batch_size[0] # since we have 2 different symmetries, we need to augment the batch size by 2 - obs_aug = torch.zeros(num_envs * 2, obs.shape[1], device=obs.device) + obs_aug = obs.repeat(2) # -- original - obs_aug[:num_envs] = obs[:] + obs_aug["policy"][:batch_size] = obs["policy"][:] # -- left-right - obs_aug[num_envs : 2 * num_envs] = -obs + obs_aug["policy"][batch_size : 2 * batch_size] = -obs["policy"] else: obs_aug = None # actions if actions is not None: - num_envs = actions.shape[0] + batch_size = actions.shape[0] # since we have 4 different symmetries, we need to augment the batch size by 4 - actions_aug = torch.zeros(num_envs * 2, actions.shape[1], device=actions.device) + actions_aug = torch.zeros(batch_size * 2, actions.shape[1], device=actions.device) # -- original - actions_aug[:num_envs] = actions[:] + actions_aug[:batch_size] = actions[:] # -- left-right - actions_aug[num_envs : 2 * num_envs] = -actions + actions_aug[batch_size : 2 * batch_size] = -actions else: actions_aug = None diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py index 2a3f4564fb87..7d2db8fa7fff 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/mdp/symmetry/anymal.py @@ -9,6 +9,7 @@ from __future__ import annotations import torch +from tensordict import TensorDict from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -21,9 +22,8 @@ @torch.no_grad() def compute_symmetric_states( env: ManagerBasedRLEnv, - obs: torch.Tensor | None = None, + obs: TensorDict | None = None, actions: torch.Tensor | None = None, - obs_type: str = "policy", ): """Augments the given observations and actions by applying symmetry transformations. @@ -34,9 +34,8 @@ def compute_symmetric_states( Args: env: The environment instance. - obs: The original observation tensor. Defaults to None. + obs: The original observation tensor dictionary. Defaults to None. actions: The original actions tensor. Defaults to None. - obs_type: The type of observation to augment. Defaults to "policy". Returns: Augmented observations and actions tensors, or None if the respective input was None. @@ -44,33 +43,39 @@ def compute_symmetric_states( # observations if obs is not None: - num_envs = obs.shape[0] + batch_size = obs.batch_size[0] # since we have 4 different symmetries, we need to augment the batch size by 4 - obs_aug = torch.zeros(num_envs * 4, obs.shape[1], device=obs.device) + obs_aug = obs.repeat(4) + + # policy observation group # -- original - obs_aug[:num_envs] = obs[:] + obs_aug["policy"][:batch_size] = obs["policy"][:] # -- left-right - obs_aug[num_envs : 2 * num_envs] = _transform_obs_left_right(env.unwrapped, obs, obs_type) + obs_aug["policy"][batch_size : 2 * batch_size] = _transform_policy_obs_left_right(env.unwrapped, obs["policy"]) # -- front-back - obs_aug[2 * num_envs : 3 * num_envs] = _transform_obs_front_back(env.unwrapped, obs, obs_type) + obs_aug["policy"][2 * batch_size : 3 * batch_size] = _transform_policy_obs_front_back( + env.unwrapped, obs["policy"] + ) # -- diagonal - obs_aug[3 * num_envs :] = _transform_obs_front_back(env.unwrapped, obs_aug[num_envs : 2 * num_envs]) + obs_aug["policy"][3 * batch_size :] = _transform_policy_obs_front_back( + env.unwrapped, obs_aug["policy"][batch_size : 2 * batch_size] + ) else: obs_aug = None # actions if actions is not None: - num_envs = actions.shape[0] + batch_size = actions.shape[0] # since we have 4 different symmetries, we need to augment the batch size by 4 - actions_aug = torch.zeros(num_envs * 4, actions.shape[1], device=actions.device) + actions_aug = torch.zeros(batch_size * 4, actions.shape[1], device=actions.device) # -- original - actions_aug[:num_envs] = actions[:] + actions_aug[:batch_size] = actions[:] # -- left-right - actions_aug[num_envs : 2 * num_envs] = _transform_actions_left_right(actions) + actions_aug[batch_size : 2 * batch_size] = _transform_actions_left_right(actions) # -- front-back - actions_aug[2 * num_envs : 3 * num_envs] = _transform_actions_front_back(actions) + actions_aug[2 * batch_size : 3 * batch_size] = _transform_actions_front_back(actions) # -- diagonal - actions_aug[3 * num_envs :] = _transform_actions_front_back(actions_aug[num_envs : 2 * num_envs]) + actions_aug[3 * batch_size :] = _transform_actions_front_back(actions_aug[batch_size : 2 * batch_size]) else: actions_aug = None @@ -82,7 +87,7 @@ def compute_symmetric_states( """ -def _transform_obs_left_right(env: ManagerBasedRLEnv, obs: torch.Tensor, obs_type: str = "policy") -> torch.Tensor: +def _transform_policy_obs_left_right(env: ManagerBasedRLEnv, obs: torch.Tensor) -> torch.Tensor: """Apply a left-right symmetry transformation to the observation tensor. This function modifies the given observation tensor by applying transformations @@ -95,7 +100,6 @@ def _transform_obs_left_right(env: ManagerBasedRLEnv, obs: torch.Tensor, obs_typ Args: env: The environment instance from which the observation is obtained. obs: The observation tensor to be transformed. - obs_type: The type of observation to augment. Defaults to "policy". Returns: The transformed observation tensor with left-right symmetry applied. @@ -118,21 +122,14 @@ def _transform_obs_left_right(env: ManagerBasedRLEnv, obs: torch.Tensor, obs_typ # last actions obs[:, 36:48] = _switch_anymal_joints_left_right(obs[:, 36:48]) - # height-scan - if obs_type == "critic": - # handle asymmetric actor-critic formulation - group_name = "critic" if "critic" in env.observation_manager.active_terms else "policy" - else: - group_name = "policy" - # note: this is hard-coded for grid-pattern of ordering "xy" and size (1.6, 1.0) - if "height_scan" in env.observation_manager.active_terms[group_name]: + if "height_scan" in env.observation_manager.active_terms["policy"]: obs[:, 48:235] = obs[:, 48:235].view(-1, 11, 17).flip(dims=[1]).view(-1, 11 * 17) return obs -def _transform_obs_front_back(env: ManagerBasedRLEnv, obs: torch.Tensor, obs_type: str = "policy") -> torch.Tensor: +def _transform_policy_obs_front_back(env: ManagerBasedRLEnv, obs: torch.Tensor) -> torch.Tensor: """Applies a front-back symmetry transformation to the observation tensor. This function modifies the given observation tensor by applying transformations @@ -144,7 +141,6 @@ def _transform_obs_front_back(env: ManagerBasedRLEnv, obs: torch.Tensor, obs_typ Args: env: The environment instance from which the observation is obtained. obs: The observation tensor to be transformed. - obs_type: The type of observation to augment. Defaults to "policy". Returns: The transformed observation tensor with front-back symmetry applied. @@ -167,15 +163,8 @@ def _transform_obs_front_back(env: ManagerBasedRLEnv, obs: torch.Tensor, obs_typ # last actions obs[:, 36:48] = _switch_anymal_joints_front_back(obs[:, 36:48]) - # height-scan - if obs_type == "critic": - # handle asymmetric actor-critic formulation - group_name = "critic" if "critic" in env.observation_manager.active_terms else "policy" - else: - group_name = "policy" - # note: this is hard-coded for grid-pattern of ordering "xy" and size (1.6, 1.0) - if "height_scan" in env.observation_manager.active_terms[group_name]: + if "height_scan" in env.observation_manager.active_terms["policy"]: obs[:, 48:235] = obs[:, 48:235].view(-1, 11, 17).flip(dims=[2]).view(-1, 11 * 17) return obs From 4f2f734afd25da11aa682f2c57e6fedcb0aa5d20 Mon Sep 17 00:00:00 2001 From: ClemensSchwarke Date: Mon, 1 Sep 2025 15:11:25 +0200 Subject: [PATCH 12/12] update rsl_rl version --- scripts/reinforcement_learning/rsl_rl/train.py | 4 ++-- source/isaaclab_rl/setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index e174cdfd84f7..33bfc9f63d4a 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -55,8 +55,8 @@ from packaging import version -# for distributed training, check minimum supported rsl-rl version -RSL_RL_VERSION = "3.0.0" +# check minimum supported rsl-rl version +RSL_RL_VERSION = "3.0.1" installed_version = metadata.version("rsl-rl-lib") if version.parse(installed_version) < version.parse(RSL_RL_VERSION): if platform.system() == "Windows": diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index 4e99e34d500a..f9ddcdb0fa50 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -46,7 +46,7 @@ "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11", "gym", ], # rl-games still needs gym :( - "rsl-rl": ["rsl-rl-lib==3.0.0"], + "rsl-rl": ["rsl-rl-lib==3.0.1"], } # Add the names with hyphens as aliases for convenience EXTRAS_REQUIRE["rl_games"] = EXTRAS_REQUIRE["rl-games"]