diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index 089ec7561979..14b9a1fdb1ae 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -46,15 +46,17 @@ "--ml_framework", type=str, default="torch", - choices=["torch", "jax", "jax-numpy"], + choices=["torch", "jax"], help="The ML framework used for training the skrl agent.", ) parser.add_argument( "--algorithm", type=str, default="PPO", - choices=["AMP", "PPO", "IPPO", "MAPPO"], - help="The RL algorithm used for training the skrl agent.", + help=( + "Name of the RL algorithm to use (e.g. AMP, DDPG, IPPO, MAPPO, PPO, SAC, TD3, etc.) " + "when several algorithms exist for the same task. For a more specific selection, use the argument --agent." + ), ) parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") @@ -84,7 +86,7 @@ from packaging import version # check for minimum supported skrl version -SKRL_VERSION = "1.4.3" +SKRL_VERSION = "2.0.0" if version.parse(skrl.__version__) < version.parse(SKRL_VERSION): skrl.logger.error( f"Unsupported skrl version: {skrl.__version__}. " @@ -207,10 +209,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, expe print(f"[INFO] Loading model checkpoint from: {resume_path}") runner.agent.load(resume_path) # set agent to evaluation mode - runner.agent.set_running_mode("eval") + runner.agent.enable_training_mode(False, apply_to_models=True) # reset environment obs, _ = env.reset() + states = env.state() timestep = 0 # simulate environment while simulation_app.is_running(): @@ -219,7 +222,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, expe # run everything in inference mode with torch.inference_mode(): # agent stepping - outputs = runner.agent.act(obs, timestep=0, timesteps=0) + outputs = runner.agent.act(obs, states, timestep=0, timesteps=0) # - multi-agent (deterministic) actions if hasattr(env, "possible_agents"): actions = {a: outputs[-1][a].get("mean_actions", outputs[0][a]) for a in env.possible_agents} @@ -228,6 +231,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, expe actions = outputs[-1].get("mean_actions", outputs[0]) # env stepping obs, _, _, _, _ = env.step(actions) + states = env.state() if args_cli.video: timestep += 1 # exit the play loop after recording one video diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index cf2edce47435..badb4144031b 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -44,15 +44,17 @@ "--ml_framework", type=str, default="torch", - choices=["torch", "jax", "jax-numpy"], + choices=["torch", "jax"], help="The ML framework used for training the skrl agent.", ) parser.add_argument( "--algorithm", type=str, default="PPO", - choices=["AMP", "PPO", "IPPO", "MAPPO"], - help="The RL algorithm used for training the skrl agent.", + help=( + "Name of the RL algorithm to use (e.g. AMP, DDPG, IPPO, MAPPO, PPO, SAC, TD3, etc.) " + "when several algorithms exist for the same task. For a more specific selection, use the argument --agent." + ), ) parser.add_argument( "--ray-proc-id", "-rid", type=int, default=None, help="Automatically configured by Ray integration, otherwise None." @@ -85,7 +87,7 @@ from packaging import version # check for minimum supported skrl version -SKRL_VERSION = "1.4.3" +SKRL_VERSION = "2.0.0" if version.parse(skrl.__version__) < version.parse(SKRL_VERSION): skrl.logger.error( f"Unsupported skrl version: {skrl.__version__}. " diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml index 35ce26490606..6b5ae668f03e 100644 --- a/source/isaaclab_rl/config/extension.toml +++ b/source/isaaclab_rl/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.5.0" +version = "0.5.1" # Description title = "Isaac Lab RL" diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst index 9666e7214e81..cf903b02d832 100644 --- a/source/isaaclab_rl/docs/CHANGELOG.rst +++ b/source/isaaclab_rl/docs/CHANGELOG.rst @@ -1,6 +1,14 @@ Changelog --------- +0.5.1 (2026-04-21) +~~~~~~~~~~~~~~~~~~ + +Changed +^^^^^^^ + +* Updated skrl wrapper to support the new version of skrl 2.0. + 0.5.0 (2026-3-04) ~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab_rl/isaaclab_rl/skrl.py b/source/isaaclab_rl/isaaclab_rl/skrl.py index 5aba121523f2..b83a644831b1 100644 --- a/source/isaaclab_rl/isaaclab_rl/skrl.py +++ b/source/isaaclab_rl/isaaclab_rl/skrl.py @@ -38,7 +38,7 @@ def SkrlVecEnvWrapper( env: ManagerBasedRLEnv | DirectRLEnv | DirectMARLEnv, - ml_framework: Literal["torch", "jax", "jax-numpy"] = "torch", + ml_framework: Literal["torch", "jax", "warp"] = "torch", wrapper: Literal["auto", "isaaclab", "isaaclab-single-agent", "isaaclab-multi-agent"] = "isaaclab", ): """Wraps around Isaac Lab environment for skrl. @@ -77,9 +77,11 @@ def SkrlVecEnvWrapper( from skrl.envs.wrappers.torch import wrap_env elif ml_framework.startswith("jax"): from skrl.envs.wrappers.jax import wrap_env + elif ml_framework.startswith("warp"): + from skrl.envs.wrappers.warp import wrap_env else: - ValueError( - f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax' or 'jax-numpy'" + raise ValueError( + f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax', 'warp'" ) # wrap and return the environment diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index f1cf55d3222c..d3b81f195893 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -41,7 +41,7 @@ # Extra dependencies for RL agents EXTRAS_REQUIRE = { "sb3": ["stable-baselines3>=2.6", "tqdm", "rich"], # tqdm/rich for progress bar - "skrl": ["skrl>=1.4.3"], + "skrl": ["skrl>=2.0.0"], "rl-games": [ "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11", "gym", diff --git a/source/isaaclab_tasks/config/extension.toml b/source/isaaclab_tasks/config/extension.toml index 80335e4cf123..6b1140818ee3 100644 --- a/source/isaaclab_tasks/config/extension.toml +++ b/source/isaaclab_tasks/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.11.14" +version = "0.11.16" # Description title = "Isaac Lab Environments" diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst index 896ed0be4255..8bb095bc372a 100644 --- a/source/isaaclab_tasks/docs/CHANGELOG.rst +++ b/source/isaaclab_tasks/docs/CHANGELOG.rst @@ -1,6 +1,14 @@ Changelog --------- +0.11.16 (2026-04-21) +~~~~~~~~~~~~~~~~~~~~ + +Changed +^^^^^^^ + +* Updated some agents' configuration files for the skrl library to support the new version of skrl 2.0. + 0.11.15 (2026-03-07) ~~~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml index f9298c9252ac..606d5634e92e 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml @@ -54,7 +54,9 @@ agent: learning_rate_scheduler: KLAdaptiveLR learning_rate_scheduler_kwargs: kl_threshold: 0.008 - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml index 8f192cf2988d..aef5480970a9 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: OBSERVATIONS + input: STATES layers: [32, 32] activations: elu output: ONE @@ -54,10 +54,10 @@ agent: learning_rate_scheduler: KLAdaptiveLR learning_rate_scheduler_kwargs: kl_threshold: 0.008 + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null state_preprocessor: RunningStandardScaler state_preprocessor_kwargs: null - shared_state_preprocessor: RunningStandardScaler - shared_state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null random_timesteps: 0 diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml index 3071d039b88c..437fa80c6058 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml @@ -75,12 +75,14 @@ agent: learning_rate: 5.0e-05 learning_rate_scheduler: null learning_rate_scheduler_kwargs: null - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null - amp_state_preprocessor: RunningStandardScaler - amp_state_preprocessor_kwargs: null + amp_observation_preprocessor: RunningStandardScaler + amp_observation_preprocessor_kwargs: null random_timesteps: 0 learning_starts: 0 grad_norm_clip: 0.0 @@ -91,10 +93,9 @@ agent: value_loss_scale: 2.5 discriminator_loss_scale: 5.0 amp_batch_size: 512 - task_reward_weight: 0.0 - style_reward_weight: 1.0 + task_reward_scale: 0.0 + style_reward_scale: 2.0 discriminator_batch_size: 4096 - discriminator_reward_scale: 2.0 discriminator_logit_regularization_scale: 0.05 discriminator_gradient_penalty_scale: 5.0 discriminator_weight_decay_scale: 1.0e-04 diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml index 0f6fcdc1a03f..fa3c590a9584 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml @@ -75,12 +75,14 @@ agent: learning_rate: 5.0e-05 learning_rate_scheduler: null learning_rate_scheduler_kwargs: null - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null - amp_state_preprocessor: RunningStandardScaler - amp_state_preprocessor_kwargs: null + amp_observation_preprocessor: RunningStandardScaler + amp_observation_preprocessor_kwargs: null random_timesteps: 0 learning_starts: 0 grad_norm_clip: 0.0 @@ -91,10 +93,9 @@ agent: value_loss_scale: 2.5 discriminator_loss_scale: 5.0 amp_batch_size: 512 - task_reward_weight: 0.0 - style_reward_weight: 1.0 + task_reward_scale: 0.0 + style_reward_scale: 2.0 discriminator_batch_size: 4096 - discriminator_reward_scale: 2.0 discriminator_logit_regularization_scale: 0.05 discriminator_gradient_penalty_scale: 5.0 discriminator_weight_decay_scale: 1.0e-04 diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml index efb34f0d2f5b..8b52c661d48b 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml @@ -75,12 +75,14 @@ agent: learning_rate: 5.0e-05 learning_rate_scheduler: null learning_rate_scheduler_kwargs: null - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null - amp_state_preprocessor: RunningStandardScaler - amp_state_preprocessor_kwargs: null + amp_observation_preprocessor: RunningStandardScaler + amp_observation_preprocessor_kwargs: null random_timesteps: 0 learning_starts: 0 grad_norm_clip: 0.0 @@ -91,10 +93,9 @@ agent: value_loss_scale: 2.5 discriminator_loss_scale: 5.0 amp_batch_size: 512 - task_reward_weight: 0.0 - style_reward_weight: 1.0 + task_reward_scale: 0.0 + style_reward_scale: 2.0 discriminator_batch_size: 4096 - discriminator_reward_scale: 2.0 discriminator_logit_regularization_scale: 0.05 discriminator_gradient_penalty_scale: 5.0 discriminator_weight_decay_scale: 1.0e-04 diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml index 60be7d18110e..793f972fb7ce 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml @@ -54,7 +54,9 @@ agent: learning_rate_scheduler: KLAdaptiveLR learning_rate_scheduler_kwargs: kl_threshold: 0.016 - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml index 57c1c455185d..31c6087f500a 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: OBSERVATIONS + input: STATES layers: [512, 512, 256, 128] activations: elu output: ONE @@ -54,10 +54,10 @@ agent: learning_rate_scheduler: KLAdaptiveLR learning_rate_scheduler_kwargs: kl_threshold: 0.016 + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null state_preprocessor: RunningStandardScaler state_preprocessor_kwargs: null - shared_state_preprocessor: RunningStandardScaler - shared_state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null random_timesteps: 0