From 09ee5b62b89df4327365366cfa5cd78f3190ce94 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Tue, 16 Sep 2025 17:24:21 -0400 Subject: [PATCH 01/27] Add DDPG, SAC and TD3 agents for the Ant-Direct task --- .../isaaclab_tasks/direct/ant/__init__.py | 3 + .../direct/ant/agents/skrl_ddpg_cfg.yaml | 103 +++++++++++++++ .../direct/ant/agents/skrl_sac_cfg.yaml | 109 +++++++++++++++ .../direct/ant/agents/skrl_td3_cfg.yaml | 124 ++++++++++++++++++ .../isaaclab_tasks/direct/ant/ant_env.py | 4 +- 5 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml create mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml create mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/__init__.py index 5f66eda9885a..cbc7eaceb7de 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/__init__.py @@ -24,5 +24,8 @@ "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AntPPORunnerCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "skrl_ddpg_cfg_entry_point": f"{agents.__name__}:skrl_ddpg_cfg.yaml", + "skrl_sac_cfg_entry_point": f"{agents.__name__}:skrl_sac_cfg.yaml", + "skrl_td3_cfg_entry_point": f"{agents.__name__}:skrl_td3_cfg.yaml", }, ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml new file mode 100644 index 000000000000..33c98c42e020 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml @@ -0,0 +1,103 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [512, 256] + activations: relu + output: tanh(ACTIONS) + target_policy: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [512, 256] + activations: relu + output: tanh(ACTIONS) + critic: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + target_critic: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + + +# Replay memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: 16000 + + +# DDPG agent configuration (field names are from DDPG_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/ddpg.html +agent: + class: DDPG + gradient_steps: 1 + batch_size: 4096 + discount_factor: 0.99 + polyak: 0.005 + actor_learning_rate: 5.0e-4 + critic_learning_rate: 5.0e-4 + learning_rate_scheduler: null + learning_rate_scheduler_kwargs: null + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null + state_preprocessor_kwargs: null + random_timesteps: 50 + learning_starts: 50 + grad_norm_clip: 0 + exploration: + noise: OrnsteinUhlenbeckNoise + noise_kwargs: + theta: 0.15 + sigma: 0.1 + base_scale: 0.5 + mean: 0.0 + std: 0.1 + initial_scale: 1.0 + final_scale: 1.0e-3 + timesteps: null + rewards_shaper: null + mixed_precision: False + # logging and checkpoint + experiment: + directory: "ant_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 160000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml new file mode 100644 index 000000000000..0573a453fd2c --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml @@ -0,0 +1,109 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see gaussian_model parameters + class: GaussianMixin + clip_actions: False + clip_log_std: True + min_log_std: -20.0 + max_log_std: 2.0 + initial_log_std: 0.0 + network: + - name: net + input: OBSERVATIONS + layers: [512, 256] + activations: relu + output: tanh(ACTIONS) + critic_1: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + critic_2: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + target_critic_1: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + target_critic_2: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + + +# Replay memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: 16000 + + +# SAC agent configuration (field names are from SAC_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/sac.html +agent: + class: SAC + gradient_steps: 1 + batch_size: 4096 + discount_factor: 0.99 + polyak: 0.005 + actor_learning_rate: 5.0e-4 + critic_learning_rate: 5.0e-4 + learning_rate_scheduler: null + learning_rate_scheduler_kwargs: null + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null + state_preprocessor_kwargs: null + random_timesteps: 50 + learning_starts: 50 + grad_norm_clip: 0 + learn_entropy: True + entropy_learning_rate: 1.0e-3 + initial_entropy_value: 0.2 + target_entropy: null + rewards_shaper: null + mixed_precision: False + # logging and checkpoint + experiment: + directory: "ant_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 160000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml new file mode 100644 index 000000000000..d8a34a3c10d5 --- /dev/null +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml @@ -0,0 +1,124 @@ +# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +seed: 42 + + +# Models are instantiated using skrl's model instantiator utility +# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html +models: + separate: True + policy: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [512, 256] + activations: relu + output: tanh(ACTIONS) + target_policy: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: OBSERVATIONS + layers: [512, 256] + activations: relu + output: tanh(ACTIONS) + critic_1: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + critic_2: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + target_critic_1: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + target_critic_2: # see deterministic_model parameters + class: DeterministicMixin + clip_actions: False + network: + - name: net + input: concatenate([OBSERVATIONS, ACTIONS]) + layers: [512, 256] + activations: relu + output: ONE + + +# Replay memory +# https://skrl.readthedocs.io/en/latest/api/memories/random.html +memory: + class: RandomMemory + memory_size: 16000 + + +# TD3 agent configuration (field names are from TD3_DEFAULT_CONFIG) +# https://skrl.readthedocs.io/en/latest/api/agents/td3.html +agent: + class: TD3 + gradient_steps: 1 + batch_size: 4096 + discount_factor: 0.99 + polyak: 0.005 + actor_learning_rate: 5.0e-4 + critic_learning_rate: 5.0e-4 + learning_rate_scheduler: null + learning_rate_scheduler_kwargs: null + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null + state_preprocessor_kwargs: null + random_timesteps: 50 + learning_starts: 50 + grad_norm_clip: 0 + exploration: + noise: GaussianNoise + noise_kwargs: + mean: 0.0 + std: 0.1 + initial_scale: 1.0 + final_scale: 1.0e-3 + timesteps: null + policy_delay: 2 + smooth_regularization_noise: GaussianNoise + smooth_regularization_noise_kwargs: + mean: 0.0 + std: 0.2 + smooth_regularization_clip: 0.5 + rewards_shaper: null + mixed_precision: False + # logging and checkpoint + experiment: + directory: "ant_direct" + experiment_name: "" + write_interval: auto + checkpoint_interval: auto + + +# Sequential trainer +# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html +trainer: + class: SequentialTrainer + timesteps: 160000 + environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py index c63b42acb384..746f8a9dcaf2 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py @@ -5,6 +5,8 @@ from __future__ import annotations +import gymnasium as gym + from isaaclab_assets.robots.ant import ANT_CFG import isaaclab.sim as sim_utils @@ -24,7 +26,7 @@ class AntEnvCfg(DirectRLEnvCfg): episode_length_s = 15.0 decimation = 2 action_scale = 0.5 - action_space = 8 + action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(8,)) # bounded space (needed for random exploration) observation_space = 36 state_space = 0 From c04d746f98ff07e64a4a1cfb6d6366a5086b4bfc Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Tue, 16 Sep 2025 17:28:22 -0400 Subject: [PATCH 02/27] Fix algorithm definition when specifying the --agent argument --- scripts/reinforcement_learning/skrl/play.py | 1 + scripts/reinforcement_learning/skrl/train.py | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index b4d52c39e8c8..6be6b0eae3b4 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -121,6 +121,7 @@ agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point" else: agent_cfg_entry_point = args_cli.agent + algorithm = agent_cfg_entry_point.split("_cfg")[0].split("skrl_")[-1].lower() @hydra_task_config(args_cli.task, agent_cfg_entry_point) diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index 83bd49f94f95..2eb087146118 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -119,6 +119,7 @@ agent_cfg_entry_point = "skrl_cfg_entry_point" if algorithm in ["ppo"] else f"skrl_{algorithm}_cfg_entry_point" else: agent_cfg_entry_point = args_cli.agent + algorithm = agent_cfg_entry_point.split("_cfg")[0].split("skrl_")[-1].lower() @hydra_task_config(args_cli.task, agent_cfg_entry_point) From beb5b0c3365257191fca19062a71a84675ee6b5d Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Tue, 16 Sep 2025 17:34:03 -0400 Subject: [PATCH 03/27] Replace STATES by OBSERVATIONS when defining model's inputs --- .../direct/allegro_hand/agents/skrl_ppo_cfg.yaml | 4 ++-- .../isaaclab_tasks/direct/ant/agents/skrl_ppo_cfg.yaml | 4 ++-- .../direct/anymal_c/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../direct/anymal_c/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml | 4 ++-- .../direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml | 4 ++-- .../direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml | 4 ++-- .../direct/cartpole/agents/skrl_camera_ppo_cfg.yaml | 4 ++-- .../isaaclab_tasks/direct/cartpole/agents/skrl_ppo_cfg.yaml | 4 ++-- .../direct/franka_cabinet/agents/skrl_ppo_cfg.yaml | 4 ++-- .../isaaclab_tasks/direct/humanoid/agents/skrl_ppo_cfg.yaml | 4 ++-- .../direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml | 6 +++--- .../direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml | 6 +++--- .../direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml | 6 +++--- .../direct/quadcopter/agents/skrl_ppo_cfg.yaml | 4 ++-- .../direct/shadow_hand/agents/skrl_ff_ppo_cfg.yaml | 4 ++-- .../direct/shadow_hand/agents/skrl_ppo_cfg.yaml | 4 ++-- .../direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml | 4 ++-- .../direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml | 4 ++-- .../direct/shadow_hand_over/agents/skrl_ppo_cfg.yaml | 4 ++-- .../manager_based/classic/ant/agents/skrl_ppo_cfg.yaml | 4 ++-- .../manager_based/classic/cartpole/agents/skrl_ppo_cfg.yaml | 4 ++-- .../manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml | 4 ++-- .../velocity/config/a1/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../velocity/config/a1/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../velocity/config/anymal_b/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../velocity/config/anymal_b/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../velocity/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../velocity/config/anymal_c/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../velocity/config/anymal_d/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../velocity/config/anymal_d/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../velocity/config/cassie/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../velocity/config/cassie/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../velocity/config/g1/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../velocity/config/g1/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../velocity/config/go1/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../velocity/config/go1/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../velocity/config/go2/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../velocity/config/go2/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../velocity/config/h1/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../velocity/config/h1/agents/skrl_rough_ppo_cfg.yaml | 4 ++-- .../velocity/config/spot/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- .../cabinet/config/franka/agents/skrl_ppo_cfg.yaml | 4 ++-- .../inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml | 4 ++-- .../lift/config/franka/agents/skrl_ppo_cfg.yaml | 4 ++-- .../reach/config/franka/agents/skrl_ppo_cfg.yaml | 4 ++-- .../reach/config/ur_10/agents/skrl_ppo_cfg.yaml | 4 ++-- .../config/anymal_c/agents/skrl_flat_ppo_cfg.yaml | 4 ++-- 48 files changed, 99 insertions(+), 99 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/skrl_ppo_cfg.yaml index 1d0eb42d37c9..42917104e36d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/allegro_hand/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ppo_cfg.yaml index 9701ac0a8c58..78dcc9de5d1d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/skrl_flat_ppo_cfg.yaml index bcaf9abbb5c1..693ca6c2b306 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/skrl_rough_ppo_cfg.yaml index 63d05fb1364c..f235de692af0 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/anymal_c/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml index 2ddc221af819..2f66ad8d20ad 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml index 7d9885205d4d..ee30acb3484a 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml index cd8fff7ba72b..c053b5b00353 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/skrl_camera_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/skrl_camera_ppo_cfg.yaml index 18719d99197d..17fcf9c72715 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/skrl_camera_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/skrl_camera_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: features_extractor - input: permute(STATES, (0, 3, 1, 2)) # PyTorch NHWC -> NCHW. Warning: don't permute for JAX since it expects NHWC + input: permute(OBSERVATIONS, (0, 3, 1, 2)) # PyTorch NHWC -> NCHW. Warning: don't permute for JAX since it expects NHWC layers: - conv2d: {out_channels: 32, kernel_size: 8, stride: 4, padding: 0} - conv2d: {out_channels: 64, kernel_size: 4, stride: 2, padding: 0} @@ -36,7 +36,7 @@ models: clip_actions: False network: - name: features_extractor - input: permute(STATES, (0, 3, 1, 2)) # PyTorch NHWC -> NCHW. Warning: don't permute for JAX since it expects NHWC + input: permute(OBSERVATIONS, (0, 3, 1, 2)) # PyTorch NHWC -> NCHW. Warning: don't permute for JAX since it expects NHWC layers: - conv2d: {out_channels: 32, kernel_size: 8, stride: 4, padding: 0} - conv2d: {out_channels: 64, kernel_size: 4, stride: 2, padding: 0} diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/skrl_ppo_cfg.yaml index 661acc55badd..83bcf50162a9 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/skrl_ppo_cfg.yaml index 41a56f82fc27..d1cf5a6b5df6 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/franka_cabinet/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/skrl_ppo_cfg.yaml index aa0786091eea..130d1999ec37 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [400, 200, 100] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [400, 200, 100] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml index 6b26961e3b6c..090d5eb90a69 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml @@ -20,7 +20,7 @@ models: fixed_log_std: True network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512] activations: relu output: ACTIONS @@ -29,7 +29,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512] activations: relu output: ONE @@ -38,7 +38,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512] activations: relu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml index 4571db8777c8..f74cecfeb64f 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml @@ -20,7 +20,7 @@ models: fixed_log_std: True network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512] activations: relu output: ACTIONS @@ -29,7 +29,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512] activations: relu output: ONE @@ -38,7 +38,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512] activations: relu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml index 7cfa1dc367a7..727258be3ca6 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml @@ -20,7 +20,7 @@ models: fixed_log_std: True network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512] activations: relu output: ACTIONS @@ -29,7 +29,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512] activations: relu output: ONE @@ -38,7 +38,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [1024, 512] activations: relu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/skrl_ppo_cfg.yaml index bd7ac17eec0b..3353c5786af2 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/quadcopter/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [64, 64] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [64, 64] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ff_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ff_ppo_cfg.yaml index 9d4da11bbbbc..7ef224f78ebf 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ff_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ff_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [400, 400, 200, 100] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ppo_cfg.yaml index d0d82c6c77e7..cae9a8445e34 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml index c9bf684b0082..84f23d446f6e 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml index 7dd38e3096d3..479219a86288 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ppo_cfg.yaml index 38b8f6ce0142..789738bdf907 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/skrl_ppo_cfg.yaml index 48eaa50c03cd..4375afee0cb5 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/ant/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/skrl_ppo_cfg.yaml index d5c8157ce353..4a2b308e670d 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/cartpole/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [32, 32] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml index d471c535f915..e9f3913a029b 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/classic/humanoid/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [400, 200, 100] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [400, 200, 100] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/skrl_flat_ppo_cfg.yaml index 3ef50e08dcc7..873657e3578a 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/skrl_rough_ppo_cfg.yaml index 7c4577efc4ee..b8227096f5d2 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/a1/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/skrl_flat_ppo_cfg.yaml index e6c7fdc17c03..d8c336da407f 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/skrl_rough_ppo_cfg.yaml index 4ea1d0a4044e..2273df9c37d3 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_b/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml index e8fb16d26cbc..f0942278b837 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_rough_ppo_cfg.yaml index 3c929fa0ee87..5c7fedf07b00 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_c/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/skrl_flat_ppo_cfg.yaml index 33627d76a3eb..88a2bc75b25f 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/skrl_rough_ppo_cfg.yaml index ea54efbb14e3..9df85573ef5e 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/skrl_flat_ppo_cfg.yaml index 43ddef1bcd7e..dd80f5fd1965 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/skrl_rough_ppo_cfg.yaml index db92e1f86ce7..883148f878ec 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/cassie/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/skrl_flat_ppo_cfg.yaml index 3aa086273828..b6ecdf1f3013 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/skrl_rough_ppo_cfg.yaml index 3d9390bf722b..6013e3f070d3 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/g1/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/skrl_flat_ppo_cfg.yaml index 51445b2aadb0..7cd7c9bb5b5b 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/skrl_rough_ppo_cfg.yaml index cbd8389751c0..79daaec43f2b 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go1/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/skrl_flat_ppo_cfg.yaml index e7be95a91962..1b3ecf74fd53 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/skrl_rough_ppo_cfg.yaml index 4fef61da4a35..aeffb439a172 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/go2/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/skrl_flat_ppo_cfg.yaml index a6166fcb1d37..1bcc39eb42ef 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/skrl_rough_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/skrl_rough_ppo_cfg.yaml index d111bdc80248..7538f906a217 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/skrl_rough_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/h1/agents/skrl_rough_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/skrl_flat_ppo_cfg.yaml index 104e205d4b61..c380e841e4c0 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/locomotion/velocity/config/spot/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/skrl_ppo_cfg.yaml index 341db684146d..4e81f3673de6 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/cabinet/config/franka/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml index 1537f0d4c446..6e12c4940faa 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/inhand/config/allegro_hand/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [512, 256, 128] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/skrl_ppo_cfg.yaml index 6d5d34de5a33..5ddcf1713e75 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/lift/config/franka/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [256, 128, 64] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/skrl_ppo_cfg.yaml index 62cef0dde2d9..d6cf3c8dd251 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/franka/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [64, 64] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [64, 64] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/skrl_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/skrl_ppo_cfg.yaml index f6412089ff08..f14c8a6094b5 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/skrl_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/reach/config/ur_10/agents/skrl_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: 0.0 network: - name: net - input: STATES + input: OBSERVATIONS layers: [64, 64] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [64, 64] activations: elu output: ONE diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml index 005f95806d16..5473188cbd86 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/agents/skrl_flat_ppo_cfg.yaml @@ -19,7 +19,7 @@ models: initial_log_std: -0.6931471805599453 network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128] activations: elu output: ACTIONS @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: STATES + input: OBSERVATIONS layers: [128, 128] activations: elu output: ONE From 6bb1e5d2fad44a97671d5b17c092b01849514b80 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Thu, 16 Oct 2025 14:53:56 -0400 Subject: [PATCH 04/27] Update AMP agent config --- .../humanoid_amp/agents/skrl_dance_amp_cfg.yaml | 13 +++++++------ .../humanoid_amp/agents/skrl_run_amp_cfg.yaml | 13 +++++++------ .../humanoid_amp/agents/skrl_walk_amp_cfg.yaml | 13 +++++++------ 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml index 090d5eb90a69..0e3801bfce0f 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_dance_amp_cfg.yaml @@ -75,12 +75,14 @@ agent: learning_rate: 5.0e-05 learning_rate_scheduler: null learning_rate_scheduler_kwargs: null - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null - amp_state_preprocessor: RunningStandardScaler - amp_state_preprocessor_kwargs: null + amp_observation_preprocessor: RunningStandardScaler + amp_observation_preprocessor_kwargs: null random_timesteps: 0 learning_starts: 0 grad_norm_clip: 0.0 @@ -91,10 +93,9 @@ agent: value_loss_scale: 2.5 discriminator_loss_scale: 5.0 amp_batch_size: 512 - task_reward_weight: 0.0 - style_reward_weight: 1.0 + task_reward_scale: 0.0 + style_reward_scale: 2.0 discriminator_batch_size: 4096 - discriminator_reward_scale: 2.0 discriminator_logit_regularization_scale: 0.05 discriminator_gradient_penalty_scale: 5.0 discriminator_weight_decay_scale: 1.0e-04 diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml index f74cecfeb64f..15295d66357a 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_run_amp_cfg.yaml @@ -75,12 +75,14 @@ agent: learning_rate: 5.0e-05 learning_rate_scheduler: null learning_rate_scheduler_kwargs: null - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null - amp_state_preprocessor: RunningStandardScaler - amp_state_preprocessor_kwargs: null + amp_observation_preprocessor: RunningStandardScaler + amp_observation_preprocessor_kwargs: null random_timesteps: 0 learning_starts: 0 grad_norm_clip: 0.0 @@ -91,10 +93,9 @@ agent: value_loss_scale: 2.5 discriminator_loss_scale: 5.0 amp_batch_size: 512 - task_reward_weight: 0.0 - style_reward_weight: 1.0 + task_reward_scale: 0.0 + style_reward_scale: 2.0 discriminator_batch_size: 4096 - discriminator_reward_scale: 2.0 discriminator_logit_regularization_scale: 0.05 discriminator_gradient_penalty_scale: 5.0 discriminator_weight_decay_scale: 1.0e-04 diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml index 727258be3ca6..6d86d8e8f804 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/humanoid_amp/agents/skrl_walk_amp_cfg.yaml @@ -75,12 +75,14 @@ agent: learning_rate: 5.0e-05 learning_rate_scheduler: null learning_rate_scheduler_kwargs: null - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null - amp_state_preprocessor: RunningStandardScaler - amp_state_preprocessor_kwargs: null + amp_observation_preprocessor: RunningStandardScaler + amp_observation_preprocessor_kwargs: null random_timesteps: 0 learning_starts: 0 grad_norm_clip: 0.0 @@ -91,10 +93,9 @@ agent: value_loss_scale: 2.5 discriminator_loss_scale: 5.0 amp_batch_size: 512 - task_reward_weight: 0.0 - style_reward_weight: 1.0 + task_reward_scale: 0.0 + style_reward_scale: 2.0 discriminator_batch_size: 4096 - discriminator_reward_scale: 2.0 discriminator_logit_regularization_scale: 0.05 discriminator_gradient_penalty_scale: 5.0 discriminator_weight_decay_scale: 1.0e-04 From d6b22cead5f967b4218b8d9921a086b0df589b5b Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sun, 19 Oct 2025 10:25:00 -0400 Subject: [PATCH 05/27] Update multi-agent config --- .../direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml | 4 +++- .../direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml | 6 +++--- .../direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml | 4 +++- .../direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml | 6 +++--- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml index 2f66ad8d20ad..01febf74d54f 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_ippo_cfg.yaml @@ -54,7 +54,9 @@ agent: learning_rate_scheduler: KLAdaptiveLR learning_rate_scheduler_kwargs: kl_threshold: 0.008 - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml index ee30acb3484a..c15d5c8d6459 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cart_double_pendulum/agents/skrl_mappo_cfg.yaml @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: OBSERVATIONS + input: STATES layers: [32, 32] activations: elu output: ONE @@ -54,10 +54,10 @@ agent: learning_rate_scheduler: KLAdaptiveLR learning_rate_scheduler_kwargs: kl_threshold: 0.008 + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null state_preprocessor: RunningStandardScaler state_preprocessor_kwargs: null - shared_state_preprocessor: RunningStandardScaler - shared_state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null random_timesteps: 0 diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml index 84f23d446f6e..b36c4624e3fe 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_ippo_cfg.yaml @@ -54,7 +54,9 @@ agent: learning_rate_scheduler: KLAdaptiveLR learning_rate_scheduler_kwargs: kl_threshold: 0.016 - state_preprocessor: RunningStandardScaler + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null + state_preprocessor: null state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml index 479219a86288..8b81a3cb7ef2 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand_over/agents/skrl_mappo_cfg.yaml @@ -28,7 +28,7 @@ models: clip_actions: False network: - name: net - input: OBSERVATIONS + input: STATES layers: [512, 512, 256, 128] activations: elu output: ONE @@ -54,10 +54,10 @@ agent: learning_rate_scheduler: KLAdaptiveLR learning_rate_scheduler_kwargs: kl_threshold: 0.016 + observation_preprocessor: RunningStandardScaler + observation_preprocessor_kwargs: null state_preprocessor: RunningStandardScaler state_preprocessor_kwargs: null - shared_state_preprocessor: RunningStandardScaler - shared_state_preprocessor_kwargs: null value_preprocessor: RunningStandardScaler value_preprocessor_kwargs: null random_timesteps: 0 From 8261a637f9a12b58977b70f0fdb39532f553d191 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sun, 19 Oct 2025 14:51:25 -0400 Subject: [PATCH 06/27] Update off-policy agents config --- .../direct/ant/agents/skrl_ddpg_cfg.yaml | 22 ++++++++----------- .../direct/ant/agents/skrl_sac_cfg.yaml | 4 +--- .../direct/ant/agents/skrl_td3_cfg.yaml | 16 +++++--------- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml index 33c98c42e020..51f0a31d1efc 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml @@ -63,8 +63,7 @@ agent: batch_size: 4096 discount_factor: 0.99 polyak: 0.005 - actor_learning_rate: 5.0e-4 - critic_learning_rate: 5.0e-4 + learning_rate: 5.0e-4 learning_rate_scheduler: null learning_rate_scheduler_kwargs: null observation_preprocessor: RunningStandardScaler @@ -74,17 +73,14 @@ agent: random_timesteps: 50 learning_starts: 50 grad_norm_clip: 0 - exploration: - noise: OrnsteinUhlenbeckNoise - noise_kwargs: - theta: 0.15 - sigma: 0.1 - base_scale: 0.5 - mean: 0.0 - std: 0.1 - initial_scale: 1.0 - final_scale: 1.0e-3 - timesteps: null + exploration_noise: OrnsteinUhlenbeckNoise + exploration_noise_kwargs: + theta: 0.15 + sigma: 0.1 + base_scale: 0.5 + mean: 0.0 + std: 0.1 + exploration_scheduler: max(1 - timestep / timesteps, 0.01) rewards_shaper: null mixed_precision: False # logging and checkpoint diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml index 0573a453fd2c..4f244dcbddcc 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml @@ -76,8 +76,7 @@ agent: batch_size: 4096 discount_factor: 0.99 polyak: 0.005 - actor_learning_rate: 5.0e-4 - critic_learning_rate: 5.0e-4 + learning_rate: 5.0e-4 learning_rate_scheduler: null learning_rate_scheduler_kwargs: null observation_preprocessor: RunningStandardScaler @@ -88,7 +87,6 @@ agent: learning_starts: 50 grad_norm_clip: 0 learn_entropy: True - entropy_learning_rate: 1.0e-3 initial_entropy_value: 0.2 target_entropy: null rewards_shaper: null diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml index d8a34a3c10d5..77ca52d5cbed 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml @@ -81,8 +81,7 @@ agent: batch_size: 4096 discount_factor: 0.99 polyak: 0.005 - actor_learning_rate: 5.0e-4 - critic_learning_rate: 5.0e-4 + learning_rate: 5.0e-4 learning_rate_scheduler: null learning_rate_scheduler_kwargs: null observation_preprocessor: RunningStandardScaler @@ -92,14 +91,11 @@ agent: random_timesteps: 50 learning_starts: 50 grad_norm_clip: 0 - exploration: - noise: GaussianNoise - noise_kwargs: - mean: 0.0 - std: 0.1 - initial_scale: 1.0 - final_scale: 1.0e-3 - timesteps: null + exploration_noise: GaussianNoise + exploration_noise_kwargs: + mean: 0.0 + std: 0.1 + exploration_scheduler: max(1 - timestep / timesteps, 0.01) policy_delay: 2 smooth_regularization_noise: GaussianNoise smooth_regularization_noise_kwargs: From b966ecc24ff62553affeecb62f69f9cf80f01efa Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sun, 19 Oct 2025 15:06:51 -0400 Subject: [PATCH 07/27] Add DDPG, SAC and TD3 choices to argparse --- scripts/reinforcement_learning/skrl/play.py | 2 +- scripts/reinforcement_learning/skrl/train.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index 6be6b0eae3b4..81c89a50eb76 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -53,7 +53,7 @@ "--algorithm", type=str, default="PPO", - choices=["AMP", "PPO", "IPPO", "MAPPO"], + choices=["AMP", "DDPG", "IPPO", "MAPPO", "PPO", "SAC", "TD3"], help="The RL algorithm used for training the skrl agent.", ) parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index 2eb087146118..4662f334a04e 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -51,7 +51,7 @@ "--algorithm", type=str, default="PPO", - choices=["AMP", "PPO", "IPPO", "MAPPO"], + choices=["AMP", "DDPG", "IPPO", "MAPPO", "PPO", "SAC", "TD3"], help="The RL algorithm used for training the skrl agent.", ) From 414beaada0ba00324ab22de5e36e4742073799e3 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sun, 19 Oct 2025 15:36:32 -0400 Subject: [PATCH 08/27] Normalizes line endings for docs/make.bat --- docs/make.bat | 130 +++++++++++++++++++++++++------------------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/docs/make.bat b/docs/make.bat index 941689ef03c8..676a3abc67d6 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -1,65 +1,65 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file to build Sphinx documentation - -set SOURCEDIR=. -set BUILDDIR=_build - -REM Check if a specific target was passed -if "%1" == "multi-docs" ( - REM Check if SPHINXBUILD is set, if not default to sphinx-multiversion - if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-multiversion - ) - where %SPHINXBUILD% >NUL 2>NUL - if errorlevel 1 ( - echo. - echo.The 'sphinx-multiversion' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-multiversion' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 - ) - %SPHINXBUILD% %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - - REM Copy the redirect index.html to the build directory - copy _redirect\index.html %BUILDDIR%\index.html - goto end -) - -if "%1" == "current-docs" ( - REM Check if SPHINXBUILD is set, if not default to sphinx-build - if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build - ) - where %SPHINXBUILD% >NUL 2>NUL - if errorlevel 1 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 - ) - if exist "%BUILDDIR%\current" rmdir /s /q "%BUILDDIR%\current" - %SPHINXBUILD% -W "%SOURCEDIR%" "%BUILDDIR%\current" %SPHINXOPTS% - goto end -) - -REM If no valid target is passed, show usage instructions -echo. -echo.Usage: -echo. make.bat multi-docs - To build the multi-version documentation. -echo. make.bat current-docs - To build the current documentation. -echo. - -:end -popd +@ECHO OFF + +pushd %~dp0 + +REM Command file to build Sphinx documentation + +set SOURCEDIR=. +set BUILDDIR=_build + +REM Check if a specific target was passed +if "%1" == "multi-docs" ( + REM Check if SPHINXBUILD is set, if not default to sphinx-multiversion + if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-multiversion + ) + where %SPHINXBUILD% >NUL 2>NUL + if errorlevel 1 ( + echo. + echo.The 'sphinx-multiversion' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-multiversion' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 + ) + %SPHINXBUILD% %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + + REM Copy the redirect index.html to the build directory + copy _redirect\index.html %BUILDDIR%\index.html + goto end +) + +if "%1" == "current-docs" ( + REM Check if SPHINXBUILD is set, if not default to sphinx-build + if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build + ) + where %SPHINXBUILD% >NUL 2>NUL + if errorlevel 1 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 + ) + if exist "%BUILDDIR%\current" rmdir /s /q "%BUILDDIR%\current" + %SPHINXBUILD% -W "%SOURCEDIR%" "%BUILDDIR%\current" %SPHINXOPTS% + goto end +) + +REM If no valid target is passed, show usage instructions +echo. +echo.Usage: +echo. make.bat multi-docs - To build the multi-version documentation. +echo. make.bat current-docs - To build the current documentation. +echo. + +:end +popd From f58a019b977c8469b31faaae6f0d87fcf8366bfd Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sun, 19 Oct 2025 15:58:32 -0400 Subject: [PATCH 09/27] Update DDPG exploration noise arguments --- .../isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml index 51f0a31d1efc..8383b5b423f3 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml @@ -79,7 +79,7 @@ agent: sigma: 0.1 base_scale: 0.5 mean: 0.0 - std: 0.1 + std: 1.0 exploration_scheduler: max(1 - timestep / timesteps, 0.01) rewards_shaper: null mixed_precision: False From 406ed224c4121936537510aa27934fbdbe873c94 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Thu, 13 Nov 2025 09:59:51 -0500 Subject: [PATCH 10/27] Add Warp implementation --- scripts/reinforcement_learning/skrl/train.py | 4 +++- source/isaaclab_rl/isaaclab_rl/skrl.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index 7f177890705b..3ddf7ac8fe41 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -44,7 +44,7 @@ "--ml_framework", type=str, default="torch", - choices=["torch", "jax", "jax-numpy"], + choices=["torch", "jax", "jax-numpy", "warp"], help="The ML framework used for training the skrl agent.", ) parser.add_argument( @@ -94,6 +94,8 @@ from skrl.utils.runner.torch import Runner elif args_cli.ml_framework.startswith("jax"): from skrl.utils.runner.jax import Runner +elif args_cli.ml_framework.startswith("warp"): + from skrl.utils.runner.warp import Runner from isaaclab.envs import ( DirectMARLEnv, diff --git a/source/isaaclab_rl/isaaclab_rl/skrl.py b/source/isaaclab_rl/isaaclab_rl/skrl.py index 3e5661dedd49..afca08b0f8d7 100644 --- a/source/isaaclab_rl/isaaclab_rl/skrl.py +++ b/source/isaaclab_rl/isaaclab_rl/skrl.py @@ -38,7 +38,7 @@ def SkrlVecEnvWrapper( env: ManagerBasedRLEnv | DirectRLEnv | DirectMARLEnv, - ml_framework: Literal["torch", "jax", "jax-numpy"] = "torch", + ml_framework: Literal["torch", "jax", "jax-numpy", "warp"] = "torch", wrapper: Literal["auto", "isaaclab", "isaaclab-single-agent", "isaaclab-multi-agent"] = "isaaclab", ): """Wraps around Isaac Lab environment for skrl. @@ -77,9 +77,11 @@ def SkrlVecEnvWrapper( from skrl.envs.wrappers.torch import wrap_env elif ml_framework.startswith("jax"): from skrl.envs.wrappers.jax import wrap_env + elif ml_framework.startswith("warp"): + from skrl.envs.wrappers.warp import wrap_env else: ValueError( - f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax' or 'jax-numpy'" + f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax', 'jax-numpy', 'warp'" ) # wrap and return the environment From 817a6930f9c7d745dc44b80c0d268417baadd391 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Wed, 3 Dec 2025 10:46:34 -0500 Subject: [PATCH 11/27] Remove jax-numpy backend --- scripts/reinforcement_learning/skrl/play.py | 2 +- scripts/reinforcement_learning/skrl/train.py | 2 +- source/isaaclab_rl/isaaclab_rl/skrl.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index 81c89a50eb76..d1a86b77ec1a 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -46,7 +46,7 @@ "--ml_framework", type=str, default="torch", - choices=["torch", "jax", "jax-numpy"], + choices=["torch", "jax", "warp"], help="The ML framework used for training the skrl agent.", ) parser.add_argument( diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index aabcf6ce98c8..cd2f31cac568 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -44,7 +44,7 @@ "--ml_framework", type=str, default="torch", - choices=["torch", "jax", "jax-numpy", "warp"], + choices=["torch", "jax", "warp"], help="The ML framework used for training the skrl agent.", ) parser.add_argument( diff --git a/source/isaaclab_rl/isaaclab_rl/skrl.py b/source/isaaclab_rl/isaaclab_rl/skrl.py index afca08b0f8d7..9abfc5e94a69 100644 --- a/source/isaaclab_rl/isaaclab_rl/skrl.py +++ b/source/isaaclab_rl/isaaclab_rl/skrl.py @@ -38,7 +38,7 @@ def SkrlVecEnvWrapper( env: ManagerBasedRLEnv | DirectRLEnv | DirectMARLEnv, - ml_framework: Literal["torch", "jax", "jax-numpy", "warp"] = "torch", + ml_framework: Literal["torch", "jax", "warp"] = "torch", wrapper: Literal["auto", "isaaclab", "isaaclab-single-agent", "isaaclab-multi-agent"] = "isaaclab", ): """Wraps around Isaac Lab environment for skrl. @@ -81,7 +81,7 @@ def SkrlVecEnvWrapper( from skrl.envs.wrappers.warp import wrap_env else: ValueError( - f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax', 'jax-numpy', 'warp'" + f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax', 'warp'" ) # wrap and return the environment From 60834ea2471833d18bcc1f3207a26ecdaccbdbfc Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Wed, 3 Dec 2025 10:50:15 -0500 Subject: [PATCH 12/27] Import warp runner in play.py script --- scripts/reinforcement_learning/skrl/play.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index d1a86b77ec1a..adf4b5df724d 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -96,6 +96,8 @@ from skrl.utils.runner.torch import Runner elif args_cli.ml_framework.startswith("jax"): from skrl.utils.runner.jax import Runner +elif args_cli.ml_framework.startswith("warp"): + from skrl.utils.runner.warp import Runner from isaaclab.envs import ( DirectMARLEnv, From 8c899e2b02615a2304d9e51aa8054d1f45644c68 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Wed, 3 Dec 2025 11:16:43 -0500 Subject: [PATCH 13/27] Apply pre-commit --- source/isaaclab_rl/isaaclab_rl/skrl.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/source/isaaclab_rl/isaaclab_rl/skrl.py b/source/isaaclab_rl/isaaclab_rl/skrl.py index 9abfc5e94a69..f50862142da0 100644 --- a/source/isaaclab_rl/isaaclab_rl/skrl.py +++ b/source/isaaclab_rl/isaaclab_rl/skrl.py @@ -80,9 +80,7 @@ def SkrlVecEnvWrapper( elif ml_framework.startswith("warp"): from skrl.envs.wrappers.warp import wrap_env else: - ValueError( - f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax', 'warp'" - ) + ValueError(f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax', 'warp'") # wrap and return the environment return wrap_env(env, wrapper) From 2a865f1611d5d4e8d6a512831fe4731eb3faeb5f Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Wed, 3 Dec 2025 12:25:22 -0500 Subject: [PATCH 14/27] Update play.py script --- scripts/reinforcement_learning/skrl/play.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index adf4b5df724d..28a02e722a2a 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -209,10 +209,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, expe print(f"[INFO] Loading model checkpoint from: {resume_path}") runner.agent.load(resume_path) # set agent to evaluation mode - runner.agent.set_running_mode("eval") + runner.agent.enable_training_mode(False, apply_to_models=True) # reset environment obs, _ = env.reset() + states = env.state() timestep = 0 # simulate environment while simulation_app.is_running(): @@ -221,7 +222,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, expe # run everything in inference mode with torch.inference_mode(): # agent stepping - outputs = runner.agent.act(obs, timestep=0, timesteps=0) + outputs = runner.agent.act(obs, states, timestep=0, timesteps=0) # - multi-agent (deterministic) actions if hasattr(env, "possible_agents"): actions = {a: outputs[-1][a].get("mean_actions", outputs[0][a]) for a in env.possible_agents} @@ -230,6 +231,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, expe actions = outputs[-1].get("mean_actions", outputs[0]) # env stepping obs, _, _, _, _ = env.step(actions) + states = env.state() if args_cli.video: timestep += 1 # exit the play loop after recording one video From 259e02387783c3d23d1e932132ec65c7bda5905c Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Fri, 27 Mar 2026 09:41:14 +0100 Subject: [PATCH 15/27] Apply format --- .../isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml | 2 +- .../isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml | 2 +- .../isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml | 2 +- source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py | 2 -- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml index 8383b5b423f3..bab310c08026 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml index 4f244dcbddcc..90697ddec035 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml index 77ca52d5cbed..9cd0c6010c2b 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). # All rights reserved. # # SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py index a7c101f71303..6e3a139c3cc0 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py @@ -7,8 +7,6 @@ import gymnasium as gym -from isaaclab_assets.robots.ant import ANT_CFG - import isaaclab.sim as sim_utils from isaaclab.assets import ArticulationCfg from isaaclab.envs import DirectRLEnvCfg From 971f92158651214fb1c15736b2b5523b78e41685 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Fri, 10 Apr 2026 10:33:10 +0200 Subject: [PATCH 16/27] Set minimum skrl version to 2.0.0 --- scripts/reinforcement_learning/skrl/play.py | 2 +- scripts/reinforcement_learning/skrl/train.py | 2 +- source/isaaclab_rl/setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index 81ba27e930d0..58a24e3264f6 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -84,7 +84,7 @@ from packaging import version # check for minimum supported skrl version -SKRL_VERSION = "1.4.3" +SKRL_VERSION = "2.0.0" if version.parse(skrl.__version__) < version.parse(SKRL_VERSION): skrl.logger.error( f"Unsupported skrl version: {skrl.__version__}. " diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index 8aa98b6ee225..32836fa9c211 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -85,7 +85,7 @@ from packaging import version # check for minimum supported skrl version -SKRL_VERSION = "1.4.3" +SKRL_VERSION = "2.0.0" if version.parse(skrl.__version__) < version.parse(SKRL_VERSION): skrl.logger.error( f"Unsupported skrl version: {skrl.__version__}. " diff --git a/source/isaaclab_rl/setup.py b/source/isaaclab_rl/setup.py index f1cf55d3222c..d3b81f195893 100644 --- a/source/isaaclab_rl/setup.py +++ b/source/isaaclab_rl/setup.py @@ -41,7 +41,7 @@ # Extra dependencies for RL agents EXTRAS_REQUIRE = { "sb3": ["stable-baselines3>=2.6", "tqdm", "rich"], # tqdm/rich for progress bar - "skrl": ["skrl>=1.4.3"], + "skrl": ["skrl>=2.0.0"], "rl-games": [ "rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11", "gym", From 5aff1883bd0a776886a30fae75ededdf9e8294ba Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Fri, 10 Apr 2026 11:02:56 +0200 Subject: [PATCH 17/27] Add setup steps for Warp to skrl docs --- .../rl_existing_scripts.rst | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/docs/source/overview/reinforcement-learning/rl_existing_scripts.rst b/docs/source/overview/reinforcement-learning/rl_existing_scripts.rst index 9ffd47b401e2..a2a09c38d424 100644 --- a/docs/source/overview/reinforcement-learning/rl_existing_scripts.rst +++ b/docs/source/overview/reinforcement-learning/rl_existing_scripts.rst @@ -198,6 +198,47 @@ SKRL # run script for recording video of a trained agent (requires installing `ffmpeg`) ./isaaclab.sh -p scripts/reinforcement_learning/skrl/play.py --task Isaac-Reach-Franka-v0 --headless --ml_framework jax --video --video_length 200 + .. tab-item:: Warp + + .. tab-set:: + :sync-group: os + + .. tab-item:: :icon:`fa-brands fa-linux` Linux + :sync: linux + + .. code:: bash + + # install python module (for skrl) + ./isaaclab.sh -i skrl + # install skrl dependencies for NVIDIA Warp + ./isaaclab.sh -p -m pip install skrl["warp"] + # run script for training + ./isaaclab.sh -p scripts/reinforcement_learning/skrl/train.py --task Isaac-Reach-Franka-v0 --headless --ml_framework warp + # run script for playing with 32 environments + ./isaaclab.sh -p scripts/reinforcement_learning/skrl/play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --ml_framework warp --checkpoint /PATH/TO/model.pt + # run script for playing a pre-trained checkpoint with 32 environments + ./isaaclab.sh -p scripts/reinforcement_learning/skrl/play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --ml_framework warp --use_pretrained_checkpoint + # run script for recording video of a trained agent (requires installing `ffmpeg`) + ./isaaclab.sh -p scripts/reinforcement_learning/skrl/play.py --task Isaac-Reach-Franka-v0 --headless --ml_framework warp --video --video_length 200 + + .. tab-item:: :icon:`fa-brands fa-windows` Windows + :sync: windows + + .. code:: batch + + :: install python module (for skrl) + isaaclab.bat -i skrl + :: install skrl dependencies for NVIDIA Warp + isaaclab.bat -p -m pip install skrl["warp"] + :: run script for training + isaaclab.bat -p scripts\reinforcement_learning\skrl\train.py --task Isaac-Reach-Franka-v0 --headless --ml_framework warp + :: run script for playing with 32 environments + isaaclab.bat -p scripts\reinforcement_learning\skrl\play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --ml_framework warp --checkpoint /PATH/TO/model.pt + :: run script for playing a pre-trained checkpoint with 32 environments + isaaclab.bat -p scripts\reinforcement_learning\skrl\play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --ml_framework warp --use_pretrained_checkpoint + :: run script for recording video of a trained agent (requires installing `ffmpeg`) + isaaclab.bat -p scripts\reinforcement_learning\skrl\play.py --task Isaac-Reach-Franka-v0 --headless --ml_framework warp --video --video_length 200 + - Training the multi-agent environment ``Isaac-Shadow-Hand-Over-Direct-v0`` with skrl: .. tab-set:: From 67fe6a555a3ce2d8207a96cbc30d0b2d1b85589f Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Fri, 17 Apr 2026 13:30:34 +0200 Subject: [PATCH 18/27] Remove Warp framework --- .../rl_existing_scripts.rst | 41 ------------------- scripts/reinforcement_learning/skrl/play.py | 4 +- scripts/reinforcement_learning/skrl/train.py | 4 +- 3 files changed, 2 insertions(+), 47 deletions(-) diff --git a/docs/source/overview/reinforcement-learning/rl_existing_scripts.rst b/docs/source/overview/reinforcement-learning/rl_existing_scripts.rst index a2a09c38d424..9ffd47b401e2 100644 --- a/docs/source/overview/reinforcement-learning/rl_existing_scripts.rst +++ b/docs/source/overview/reinforcement-learning/rl_existing_scripts.rst @@ -198,47 +198,6 @@ SKRL # run script for recording video of a trained agent (requires installing `ffmpeg`) ./isaaclab.sh -p scripts/reinforcement_learning/skrl/play.py --task Isaac-Reach-Franka-v0 --headless --ml_framework jax --video --video_length 200 - .. tab-item:: Warp - - .. tab-set:: - :sync-group: os - - .. tab-item:: :icon:`fa-brands fa-linux` Linux - :sync: linux - - .. code:: bash - - # install python module (for skrl) - ./isaaclab.sh -i skrl - # install skrl dependencies for NVIDIA Warp - ./isaaclab.sh -p -m pip install skrl["warp"] - # run script for training - ./isaaclab.sh -p scripts/reinforcement_learning/skrl/train.py --task Isaac-Reach-Franka-v0 --headless --ml_framework warp - # run script for playing with 32 environments - ./isaaclab.sh -p scripts/reinforcement_learning/skrl/play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --ml_framework warp --checkpoint /PATH/TO/model.pt - # run script for playing a pre-trained checkpoint with 32 environments - ./isaaclab.sh -p scripts/reinforcement_learning/skrl/play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --ml_framework warp --use_pretrained_checkpoint - # run script for recording video of a trained agent (requires installing `ffmpeg`) - ./isaaclab.sh -p scripts/reinforcement_learning/skrl/play.py --task Isaac-Reach-Franka-v0 --headless --ml_framework warp --video --video_length 200 - - .. tab-item:: :icon:`fa-brands fa-windows` Windows - :sync: windows - - .. code:: batch - - :: install python module (for skrl) - isaaclab.bat -i skrl - :: install skrl dependencies for NVIDIA Warp - isaaclab.bat -p -m pip install skrl["warp"] - :: run script for training - isaaclab.bat -p scripts\reinforcement_learning\skrl\train.py --task Isaac-Reach-Franka-v0 --headless --ml_framework warp - :: run script for playing with 32 environments - isaaclab.bat -p scripts\reinforcement_learning\skrl\play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --ml_framework warp --checkpoint /PATH/TO/model.pt - :: run script for playing a pre-trained checkpoint with 32 environments - isaaclab.bat -p scripts\reinforcement_learning\skrl\play.py --task Isaac-Reach-Franka-v0 --num_envs 32 --ml_framework warp --use_pretrained_checkpoint - :: run script for recording video of a trained agent (requires installing `ffmpeg`) - isaaclab.bat -p scripts\reinforcement_learning\skrl\play.py --task Isaac-Reach-Franka-v0 --headless --ml_framework warp --video --video_length 200 - - Training the multi-agent environment ``Isaac-Shadow-Hand-Over-Direct-v0`` with skrl: .. tab-set:: diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index 58a24e3264f6..ccfcac69680f 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -46,7 +46,7 @@ "--ml_framework", type=str, default="torch", - choices=["torch", "jax", "warp"], + choices=["torch", "jax"], help="The ML framework used for training the skrl agent.", ) parser.add_argument( @@ -96,8 +96,6 @@ from skrl.utils.runner.torch import Runner elif args_cli.ml_framework.startswith("jax"): from skrl.utils.runner.jax import Runner -elif args_cli.ml_framework.startswith("warp"): - from skrl.utils.runner.warp import Runner from isaaclab.envs import ( DirectMARLEnv, diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index 32836fa9c211..0af8a19bf2ad 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -44,7 +44,7 @@ "--ml_framework", type=str, default="torch", - choices=["torch", "jax", "warp"], + choices=["torch", "jax"], help="The ML framework used for training the skrl agent.", ) parser.add_argument( @@ -97,8 +97,6 @@ from skrl.utils.runner.torch import Runner elif args_cli.ml_framework.startswith("jax"): from skrl.utils.runner.jax import Runner -elif args_cli.ml_framework.startswith("warp"): - from skrl.utils.runner.warp import Runner from isaaclab.envs import ( DirectMARLEnv, From 9086c6721589baf4b7f54bd2277c134db3fab10a Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Fri, 17 Apr 2026 22:31:23 +0200 Subject: [PATCH 19/27] Clarify comments --- source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py index 6e3a139c3cc0..6ab8e859e723 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py @@ -26,7 +26,7 @@ class AntEnvCfg(DirectRLEnvCfg): episode_length_s = 15.0 decimation = 2 action_scale = 0.5 - action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(8,)) # bounded space (needed for random exploration) + action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(8,)) # bounded space (needed for off-policy exploration) observation_space = 36 state_space = 0 From 4c9cba045dba2643ad8da804eaeaaa6f60b1c285 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sun, 19 Apr 2026 17:42:39 +0200 Subject: [PATCH 20/27] Revert to orignal task --- .../isaaclab_tasks/direct/ant/__init__.py | 3 - .../direct/ant/agents/skrl_ddpg_cfg.yaml | 99 --------------- .../direct/ant/agents/skrl_sac_cfg.yaml | 107 ---------------- .../direct/ant/agents/skrl_td3_cfg.yaml | 120 ------------------ .../isaaclab_tasks/direct/ant/ant_env.py | 4 +- 5 files changed, 1 insertion(+), 332 deletions(-) delete mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml delete mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml delete mode 100644 source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/__init__.py b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/__init__.py index 4176f73e906a..9881cd66ca74 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/__init__.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/__init__.py @@ -24,8 +24,5 @@ "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AntPPORunnerCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", - "skrl_ddpg_cfg_entry_point": f"{agents.__name__}:skrl_ddpg_cfg.yaml", - "skrl_sac_cfg_entry_point": f"{agents.__name__}:skrl_sac_cfg.yaml", - "skrl_td3_cfg_entry_point": f"{agents.__name__}:skrl_td3_cfg.yaml", }, ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml deleted file mode 100644 index bab310c08026..000000000000 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_ddpg_cfg.yaml +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - -seed: 42 - - -# Models are instantiated using skrl's model instantiator utility -# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html -models: - separate: True - policy: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: OBSERVATIONS - layers: [512, 256] - activations: relu - output: tanh(ACTIONS) - target_policy: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: OBSERVATIONS - layers: [512, 256] - activations: relu - output: tanh(ACTIONS) - critic: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - target_critic: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - - -# Replay memory -# https://skrl.readthedocs.io/en/latest/api/memories/random.html -memory: - class: RandomMemory - memory_size: 16000 - - -# DDPG agent configuration (field names are from DDPG_DEFAULT_CONFIG) -# https://skrl.readthedocs.io/en/latest/api/agents/ddpg.html -agent: - class: DDPG - gradient_steps: 1 - batch_size: 4096 - discount_factor: 0.99 - polyak: 0.005 - learning_rate: 5.0e-4 - learning_rate_scheduler: null - learning_rate_scheduler_kwargs: null - observation_preprocessor: RunningStandardScaler - observation_preprocessor_kwargs: null - state_preprocessor: null - state_preprocessor_kwargs: null - random_timesteps: 50 - learning_starts: 50 - grad_norm_clip: 0 - exploration_noise: OrnsteinUhlenbeckNoise - exploration_noise_kwargs: - theta: 0.15 - sigma: 0.1 - base_scale: 0.5 - mean: 0.0 - std: 1.0 - exploration_scheduler: max(1 - timestep / timesteps, 0.01) - rewards_shaper: null - mixed_precision: False - # logging and checkpoint - experiment: - directory: "ant_direct" - experiment_name: "" - write_interval: auto - checkpoint_interval: auto - - -# Sequential trainer -# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html -trainer: - class: SequentialTrainer - timesteps: 160000 - environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml deleted file mode 100644 index 90697ddec035..000000000000 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_sac_cfg.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - -seed: 42 - - -# Models are instantiated using skrl's model instantiator utility -# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html -models: - separate: True - policy: # see gaussian_model parameters - class: GaussianMixin - clip_actions: False - clip_log_std: True - min_log_std: -20.0 - max_log_std: 2.0 - initial_log_std: 0.0 - network: - - name: net - input: OBSERVATIONS - layers: [512, 256] - activations: relu - output: tanh(ACTIONS) - critic_1: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - critic_2: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - target_critic_1: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - target_critic_2: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - - -# Replay memory -# https://skrl.readthedocs.io/en/latest/api/memories/random.html -memory: - class: RandomMemory - memory_size: 16000 - - -# SAC agent configuration (field names are from SAC_DEFAULT_CONFIG) -# https://skrl.readthedocs.io/en/latest/api/agents/sac.html -agent: - class: SAC - gradient_steps: 1 - batch_size: 4096 - discount_factor: 0.99 - polyak: 0.005 - learning_rate: 5.0e-4 - learning_rate_scheduler: null - learning_rate_scheduler_kwargs: null - observation_preprocessor: RunningStandardScaler - observation_preprocessor_kwargs: null - state_preprocessor: null - state_preprocessor_kwargs: null - random_timesteps: 50 - learning_starts: 50 - grad_norm_clip: 0 - learn_entropy: True - initial_entropy_value: 0.2 - target_entropy: null - rewards_shaper: null - mixed_precision: False - # logging and checkpoint - experiment: - directory: "ant_direct" - experiment_name: "" - write_interval: auto - checkpoint_interval: auto - - -# Sequential trainer -# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html -trainer: - class: SequentialTrainer - timesteps: 160000 - environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml deleted file mode 100644 index 9cd0c6010c2b..000000000000 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/agents/skrl_td3_cfg.yaml +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). -# All rights reserved. -# -# SPDX-License-Identifier: BSD-3-Clause - -seed: 42 - - -# Models are instantiated using skrl's model instantiator utility -# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html -models: - separate: True - policy: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: OBSERVATIONS - layers: [512, 256] - activations: relu - output: tanh(ACTIONS) - target_policy: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: OBSERVATIONS - layers: [512, 256] - activations: relu - output: tanh(ACTIONS) - critic_1: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - critic_2: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - target_critic_1: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - target_critic_2: # see deterministic_model parameters - class: DeterministicMixin - clip_actions: False - network: - - name: net - input: concatenate([OBSERVATIONS, ACTIONS]) - layers: [512, 256] - activations: relu - output: ONE - - -# Replay memory -# https://skrl.readthedocs.io/en/latest/api/memories/random.html -memory: - class: RandomMemory - memory_size: 16000 - - -# TD3 agent configuration (field names are from TD3_DEFAULT_CONFIG) -# https://skrl.readthedocs.io/en/latest/api/agents/td3.html -agent: - class: TD3 - gradient_steps: 1 - batch_size: 4096 - discount_factor: 0.99 - polyak: 0.005 - learning_rate: 5.0e-4 - learning_rate_scheduler: null - learning_rate_scheduler_kwargs: null - observation_preprocessor: RunningStandardScaler - observation_preprocessor_kwargs: null - state_preprocessor: null - state_preprocessor_kwargs: null - random_timesteps: 50 - learning_starts: 50 - grad_norm_clip: 0 - exploration_noise: GaussianNoise - exploration_noise_kwargs: - mean: 0.0 - std: 0.1 - exploration_scheduler: max(1 - timestep / timesteps, 0.01) - policy_delay: 2 - smooth_regularization_noise: GaussianNoise - smooth_regularization_noise_kwargs: - mean: 0.0 - std: 0.2 - smooth_regularization_clip: 0.5 - rewards_shaper: null - mixed_precision: False - # logging and checkpoint - experiment: - directory: "ant_direct" - experiment_name: "" - write_interval: auto - checkpoint_interval: auto - - -# Sequential trainer -# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html -trainer: - class: SequentialTrainer - timesteps: 160000 - environment_info: log diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py index 6ab8e859e723..39ae57b29677 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/ant/ant_env.py @@ -5,8 +5,6 @@ from __future__ import annotations -import gymnasium as gym - import isaaclab.sim as sim_utils from isaaclab.assets import ArticulationCfg from isaaclab.envs import DirectRLEnvCfg @@ -26,7 +24,7 @@ class AntEnvCfg(DirectRLEnvCfg): episode_length_s = 15.0 decimation = 2 action_scale = 0.5 - action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(8,)) # bounded space (needed for off-policy exploration) + action_space = 8 observation_space = 36 state_space = 0 From 43a5cb2c354bab6bf3c7c9405d000bc5f7b6dd65 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sun, 19 Apr 2026 17:55:44 +0200 Subject: [PATCH 21/27] Relax the --algorithm argument --- scripts/reinforcement_learning/skrl/play.py | 6 ++++-- scripts/reinforcement_learning/skrl/train.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index ccfcac69680f..14b9a1fdb1ae 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -53,8 +53,10 @@ "--algorithm", type=str, default="PPO", - choices=["AMP", "DDPG", "IPPO", "MAPPO", "PPO", "SAC", "TD3"], - help="The RL algorithm used for training the skrl agent.", + help=( + "Name of the RL algorithm to use (e.g. AMP, DDPG, IPPO, MAPPO, PPO, SAC, TD3, etc.) " + "when several algorithms exist for the same task. For a more specific selection, use the argument --agent." + ), ) parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.") diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index 0af8a19bf2ad..badb4144031b 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -51,8 +51,10 @@ "--algorithm", type=str, default="PPO", - choices=["AMP", "DDPG", "IPPO", "MAPPO", "PPO", "SAC", "TD3"], - help="The RL algorithm used for training the skrl agent.", + help=( + "Name of the RL algorithm to use (e.g. AMP, DDPG, IPPO, MAPPO, PPO, SAC, TD3, etc.) " + "when several algorithms exist for the same task. For a more specific selection, use the argument --agent." + ), ) parser.add_argument( "--ray-proc-id", "-rid", type=int, default=None, help="Automatically configured by Ray integration, otherwise None." From dcd3d7b3ddc2b129f9cdee1482039e52748c7762 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Sun, 19 Apr 2026 21:12:59 +0200 Subject: [PATCH 22/27] Fix exception --- source/isaaclab_rl/isaaclab_rl/skrl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/isaaclab_rl/isaaclab_rl/skrl.py b/source/isaaclab_rl/isaaclab_rl/skrl.py index 7a5db146bac4..b83a644831b1 100644 --- a/source/isaaclab_rl/isaaclab_rl/skrl.py +++ b/source/isaaclab_rl/isaaclab_rl/skrl.py @@ -80,7 +80,9 @@ def SkrlVecEnvWrapper( elif ml_framework.startswith("warp"): from skrl.envs.wrappers.warp import wrap_env else: - ValueError(f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax', 'warp'") + raise ValueError( + f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax', 'warp'" + ) # wrap and return the environment return wrap_env(env, wrapper) From 47aaff73ff90745c0b0b068132db0368f09a04a0 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Mon, 20 Apr 2026 16:28:28 +0200 Subject: [PATCH 23/27] Update training performance table --- .../overview/reinforcement-learning/rl_frameworks.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/overview/reinforcement-learning/rl_frameworks.rst b/docs/source/overview/reinforcement-learning/rl_frameworks.rst index 5f9d25e06e05..1f5e63d75b7b 100644 --- a/docs/source/overview/reinforcement-learning/rl_frameworks.rst +++ b/docs/source/overview/reinforcement-learning/rl_frameworks.rst @@ -71,19 +71,19 @@ Training Performance -------------------- We performed training with each RL library on the same ``Isaac-Humanoid-v0`` environment -with ``--headless`` on a single NVIDIA GeForce RTX 4090 and logged the total training time +with ``--headless`` on a single NVIDIA GeForce RTX 5090 and logged the total training time for 65.5M steps (4096 environments x 32 rollout steps x 500 iterations). +--------------------+-----------------+ | RL Library | Time in seconds | +====================+=================+ -| RL-Games | 201 | +| RL-Games | 212 | +--------------------+-----------------+ -| SKRL | 201 | +| SKRL | 210 | +--------------------+-----------------+ -| RSL RL | 198 | +| RSL RL | 233 | +--------------------+-----------------+ -| Stable-Baselines3 | 287 | +| Stable-Baselines3 | 306 | +--------------------+-----------------+ Training commands (check for the *'Training time: XXX seconds'* line in the terminal output): From 2052eac2a7e3ab2dd9863adf73e5b2646759ccc4 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Tue, 21 Apr 2026 15:31:52 +0200 Subject: [PATCH 24/27] Revert Training Performance table changes --- .../overview/reinforcement-learning/rl_frameworks.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/overview/reinforcement-learning/rl_frameworks.rst b/docs/source/overview/reinforcement-learning/rl_frameworks.rst index 1f5e63d75b7b..5f9d25e06e05 100644 --- a/docs/source/overview/reinforcement-learning/rl_frameworks.rst +++ b/docs/source/overview/reinforcement-learning/rl_frameworks.rst @@ -71,19 +71,19 @@ Training Performance -------------------- We performed training with each RL library on the same ``Isaac-Humanoid-v0`` environment -with ``--headless`` on a single NVIDIA GeForce RTX 5090 and logged the total training time +with ``--headless`` on a single NVIDIA GeForce RTX 4090 and logged the total training time for 65.5M steps (4096 environments x 32 rollout steps x 500 iterations). +--------------------+-----------------+ | RL Library | Time in seconds | +====================+=================+ -| RL-Games | 212 | +| RL-Games | 201 | +--------------------+-----------------+ -| SKRL | 210 | +| SKRL | 201 | +--------------------+-----------------+ -| RSL RL | 233 | +| RSL RL | 198 | +--------------------+-----------------+ -| Stable-Baselines3 | 306 | +| Stable-Baselines3 | 287 | +--------------------+-----------------+ Training commands (check for the *'Training time: XXX seconds'* line in the terminal output): From b366aa0fa06a1c3e8d62143a9ef580f26b28f301 Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Tue, 21 Apr 2026 15:36:16 +0200 Subject: [PATCH 25/27] Update version and CHANGELOG --- source/isaaclab_rl/config/extension.toml | 2 +- source/isaaclab_rl/docs/CHANGELOG.rst | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml index 35ce26490606..6b5ae668f03e 100644 --- a/source/isaaclab_rl/config/extension.toml +++ b/source/isaaclab_rl/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.5.0" +version = "0.5.1" # Description title = "Isaac Lab RL" diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst index 9666e7214e81..104c6e941fd8 100644 --- a/source/isaaclab_rl/docs/CHANGELOG.rst +++ b/source/isaaclab_rl/docs/CHANGELOG.rst @@ -1,6 +1,13 @@ Changelog --------- +0.5.1 (2026-04-21) +~~~~~~~~~~~~~~~~~~ + +Changed +^^^^^^^ +* Updated SKRL wrapper to support the new version of SKRL 2.0. + 0.5.0 (2026-3-04) ~~~~~~~~~~~~~~~~~~ From e0017ceb1cb113690584eb3ba94ec8163ed6e40a Mon Sep 17 00:00:00 2001 From: Antonio Serrano Munoz Date: Tue, 21 Apr 2026 15:40:18 +0200 Subject: [PATCH 26/27] Update version and CHANGELOG --- source/isaaclab_rl/docs/CHANGELOG.rst | 3 ++- source/isaaclab_tasks/config/extension.toml | 2 +- source/isaaclab_tasks/docs/CHANGELOG.rst | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst index 104c6e941fd8..cf903b02d832 100644 --- a/source/isaaclab_rl/docs/CHANGELOG.rst +++ b/source/isaaclab_rl/docs/CHANGELOG.rst @@ -6,7 +6,8 @@ Changelog Changed ^^^^^^^ -* Updated SKRL wrapper to support the new version of SKRL 2.0. + +* Updated skrl wrapper to support the new version of skrl 2.0. 0.5.0 (2026-3-04) ~~~~~~~~~~~~~~~~~~ diff --git a/source/isaaclab_tasks/config/extension.toml b/source/isaaclab_tasks/config/extension.toml index 80335e4cf123..6b1140818ee3 100644 --- a/source/isaaclab_tasks/config/extension.toml +++ b/source/isaaclab_tasks/config/extension.toml @@ -1,7 +1,7 @@ [package] # Note: Semantic Versioning is used: https://semver.org/ -version = "0.11.14" +version = "0.11.16" # Description title = "Isaac Lab Environments" diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst index 896ed0be4255..11e2dc988aca 100644 --- a/source/isaaclab_tasks/docs/CHANGELOG.rst +++ b/source/isaaclab_tasks/docs/CHANGELOG.rst @@ -1,6 +1,14 @@ Changelog --------- +0.11.16 (2026-04-21) +~~~~~~~~~~~~~~~~~~ + +Changed +^^^^^^^ + +* Updated some agents' configuration files for the skrl library to support the new version of skrl 2.0. + 0.11.15 (2026-03-07) ~~~~~~~~~~~~~~~~~~~~ From daaa43dda22ef639f091389c8fcde9b23b3bddbe Mon Sep 17 00:00:00 2001 From: Kelly Guo Date: Wed, 22 Apr 2026 17:33:41 -0700 Subject: [PATCH 27/27] Apply suggestion from @kellyguo11 Signed-off-by: Kelly Guo --- source/isaaclab_tasks/docs/CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/isaaclab_tasks/docs/CHANGELOG.rst b/source/isaaclab_tasks/docs/CHANGELOG.rst index 11e2dc988aca..8bb095bc372a 100644 --- a/source/isaaclab_tasks/docs/CHANGELOG.rst +++ b/source/isaaclab_tasks/docs/CHANGELOG.rst @@ -2,7 +2,7 @@ Changelog --------- 0.11.16 (2026-04-21) -~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~ Changed ^^^^^^^