Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
09ee5b6
Add DDPG, SAC and TD3 agents for the Ant-Direct task
Toni-SM Sep 16, 2025
c04d746
Fix algorithm definition when specifying the --agent argument
Toni-SM Sep 16, 2025
beb5b0c
Replace STATES by OBSERVATIONS when defining model's inputs
Toni-SM Sep 16, 2025
7353ff8
Merge branch 'main' into toni/skrl_2.0.0
Toni-SM Sep 30, 2025
758d9f5
Merge branch 'main' into toni/skrl_2.0.0
Toni-SM Oct 16, 2025
6bb1e5d
Update AMP agent config
Toni-SM Oct 16, 2025
d6b22ce
Update multi-agent config
Toni-SM Oct 19, 2025
8261a63
Update off-policy agents config
Toni-SM Oct 19, 2025
b966ecc
Add DDPG, SAC and TD3 choices to argparse
Toni-SM Oct 19, 2025
414beaa
Normalizes line endings for docs/make.bat
Toni-SM Oct 19, 2025
fbe0f44
Merge branch 'main' into toni/skrl_2.0.0
Toni-SM Oct 19, 2025
f58a019
Update DDPG exploration noise arguments
Toni-SM Oct 19, 2025
48041e0
Merge branch 'main' into toni/skrl_2.0.0
Toni-SM Nov 8, 2025
406ed22
Add Warp implementation
Toni-SM Nov 13, 2025
4e2885d
Merge branch 'toni/skrl_2.0.0' of github.com:Toni-SM/IsaacLab into to…
Toni-SM Nov 13, 2025
b528010
Merge branch 'main' into toni/skrl_2.0.0
Toni-SM Nov 27, 2025
5e73a9d
Merge branch 'main' into toni/skrl_2.0.0
Toni-SM Dec 3, 2025
817a693
Remove jax-numpy backend
Toni-SM Dec 3, 2025
60834ea
Import warp runner in play.py script
Toni-SM Dec 3, 2025
8c899e2
Apply pre-commit
Toni-SM Dec 3, 2025
2a865f1
Update play.py script
Toni-SM Dec 3, 2025
ee1de07
Merge branch 'main' into toni/skrl_2.0.0
Toni-SM Mar 27, 2026
259e023
Apply format
Toni-SM Mar 27, 2026
f317be3
Merge branch 'main' into toni/skrl_2.0.0
Toni-SM Apr 9, 2026
971f921
Set minimum skrl version to 2.0.0
Toni-SM Apr 10, 2026
5aff188
Add setup steps for Warp to skrl docs
Toni-SM Apr 10, 2026
67fe6a5
Remove Warp framework
Toni-SM Apr 17, 2026
9086c67
Clarify comments
Toni-SM Apr 17, 2026
4c9cba0
Revert to orignal task
Toni-SM Apr 19, 2026
43a5cb2
Relax the --algorithm argument
Toni-SM Apr 19, 2026
dcd3d7b
Fix exception
Toni-SM Apr 19, 2026
47aaff7
Update training performance table
Toni-SM Apr 20, 2026
2052eac
Revert Training Performance table changes
Toni-SM Apr 21, 2026
b366aa0
Update version and CHANGELOG
Toni-SM Apr 21, 2026
e0017ce
Update version and CHANGELOG
Toni-SM Apr 21, 2026
daaa43d
Apply suggestion from @kellyguo11
kellyguo11 Apr 23, 2026
6849141
Merge branch 'main' into toni/skrl_2.0.0_part1
kellyguo11 Apr 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions scripts/reinforcement_learning/skrl/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,17 @@
"--ml_framework",
type=str,
default="torch",
choices=["torch", "jax", "jax-numpy"],
choices=["torch", "jax"],
help="The ML framework used for training the skrl agent.",
)
Comment thread
Toni-SM marked this conversation as resolved.
parser.add_argument(
"--algorithm",
type=str,
default="PPO",
choices=["AMP", "PPO", "IPPO", "MAPPO"],
help="The RL algorithm used for training the skrl agent.",
help=(
"Name of the RL algorithm to use (e.g. AMP, DDPG, IPPO, MAPPO, PPO, SAC, TD3, etc.) "
"when several algorithms exist for the same task. For a more specific selection, use the argument --agent."
),
)
parser.add_argument("--real-time", action="store_true", default=False, help="Run in real-time, if possible.")

Expand Down Expand Up @@ -84,7 +86,7 @@
from packaging import version

# check for minimum supported skrl version
SKRL_VERSION = "1.4.3"
SKRL_VERSION = "2.0.0"
if version.parse(skrl.__version__) < version.parse(SKRL_VERSION):
skrl.logger.error(
f"Unsupported skrl version: {skrl.__version__}. "
Expand Down Expand Up @@ -207,10 +209,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, expe
print(f"[INFO] Loading model checkpoint from: {resume_path}")
runner.agent.load(resume_path)
# set agent to evaluation mode
runner.agent.set_running_mode("eval")
runner.agent.enable_training_mode(False, apply_to_models=True)

# reset environment
obs, _ = env.reset()
states = env.state()
timestep = 0
# simulate environment
while simulation_app.is_running():
Expand All @@ -219,7 +222,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, expe
# run everything in inference mode
with torch.inference_mode():
# agent stepping
outputs = runner.agent.act(obs, timestep=0, timesteps=0)
outputs = runner.agent.act(obs, states, timestep=0, timesteps=0)
# - multi-agent (deterministic) actions
if hasattr(env, "possible_agents"):
actions = {a: outputs[-1][a].get("mean_actions", outputs[0][a]) for a in env.possible_agents}
Expand All @@ -228,6 +231,7 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, expe
actions = outputs[-1].get("mean_actions", outputs[0])
# env stepping
obs, _, _, _, _ = env.step(actions)
states = env.state()
if args_cli.video:
timestep += 1
# exit the play loop after recording one video
Expand Down
10 changes: 6 additions & 4 deletions scripts/reinforcement_learning/skrl/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,17 @@
"--ml_framework",
type=str,
default="torch",
choices=["torch", "jax", "jax-numpy"],
choices=["torch", "jax"],
help="The ML framework used for training the skrl agent.",
)
parser.add_argument(
"--algorithm",
type=str,
default="PPO",
choices=["AMP", "PPO", "IPPO", "MAPPO"],
help="The RL algorithm used for training the skrl agent.",
help=(
"Name of the RL algorithm to use (e.g. AMP, DDPG, IPPO, MAPPO, PPO, SAC, TD3, etc.) "
"when several algorithms exist for the same task. For a more specific selection, use the argument --agent."
),
)
parser.add_argument(
"--ray-proc-id", "-rid", type=int, default=None, help="Automatically configured by Ray integration, otherwise None."
Expand Down Expand Up @@ -85,7 +87,7 @@
from packaging import version

# check for minimum supported skrl version
SKRL_VERSION = "1.4.3"
SKRL_VERSION = "2.0.0"
if version.parse(skrl.__version__) < version.parse(SKRL_VERSION):
skrl.logger.error(
f"Unsupported skrl version: {skrl.__version__}. "
Expand Down
2 changes: 1 addition & 1 deletion source/isaaclab_rl/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.5.0"
version = "0.5.1"

# Description
title = "Isaac Lab RL"
Expand Down
8 changes: 8 additions & 0 deletions source/isaaclab_rl/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Changelog
---------

0.5.1 (2026-04-21)
~~~~~~~~~~~~~~~~~~

Changed
^^^^^^^

* Updated skrl wrapper to support the new version of skrl 2.0.

0.5.0 (2026-3-04)
~~~~~~~~~~~~~~~~~~

Expand Down
8 changes: 5 additions & 3 deletions source/isaaclab_rl/isaaclab_rl/skrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

def SkrlVecEnvWrapper(
env: ManagerBasedRLEnv | DirectRLEnv | DirectMARLEnv,
ml_framework: Literal["torch", "jax", "jax-numpy"] = "torch",
ml_framework: Literal["torch", "jax", "warp"] = "torch",
wrapper: Literal["auto", "isaaclab", "isaaclab-single-agent", "isaaclab-multi-agent"] = "isaaclab",
):
"""Wraps around Isaac Lab environment for skrl.
Expand Down Expand Up @@ -77,9 +77,11 @@ def SkrlVecEnvWrapper(
from skrl.envs.wrappers.torch import wrap_env
elif ml_framework.startswith("jax"):
from skrl.envs.wrappers.jax import wrap_env
elif ml_framework.startswith("warp"):
from skrl.envs.wrappers.warp import wrap_env
else:
ValueError(
f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax' or 'jax-numpy'"
raise ValueError(
Comment thread
Toni-SM marked this conversation as resolved.
f"Invalid ML framework for skrl: {ml_framework}. Available options are: 'torch', 'jax', 'warp'"
)

# wrap and return the environment
Expand Down
2 changes: 1 addition & 1 deletion source/isaaclab_rl/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
# Extra dependencies for RL agents
EXTRAS_REQUIRE = {
"sb3": ["stable-baselines3>=2.6", "tqdm", "rich"], # tqdm/rich for progress bar
"skrl": ["skrl>=1.4.3"],
"skrl": ["skrl>=2.0.0"],
"rl-games": [
"rl-games @ git+https://github.com/isaac-sim/rl_games.git@python3.11",
"gym",
Expand Down
2 changes: 1 addition & 1 deletion source/isaaclab_tasks/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.11.14"
version = "0.11.16"

# Description
title = "Isaac Lab Environments"
Expand Down
8 changes: 8 additions & 0 deletions source/isaaclab_tasks/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Changelog
---------

0.11.16 (2026-04-21)
~~~~~~~~~~~~~~~~~~~~

Changed
^^^^^^^

* Updated some agents' configuration files for the skrl library to support the new version of skrl 2.0.

0.11.15 (2026-03-07)
~~~~~~~~~~~~~~~~~~~~

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ agent:
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
state_preprocessor: RunningStandardScaler
observation_preprocessor: RunningStandardScaler
Comment thread
Toni-SM marked this conversation as resolved.
observation_preprocessor_kwargs: null
state_preprocessor: null
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ models:
clip_actions: False
network:
- name: net
input: OBSERVATIONS
input: STATES
layers: [32, 32]
activations: elu
output: ONE
Expand All @@ -54,10 +54,10 @@ agent:
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.008
observation_preprocessor: RunningStandardScaler
observation_preprocessor_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,14 @@ agent:
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
observation_preprocessor: RunningStandardScaler
observation_preprocessor_kwargs: null
state_preprocessor: null
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
amp_observation_preprocessor: RunningStandardScaler
amp_observation_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
Expand All @@ -91,10 +93,9 @@ agent:
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
task_reward_scale: 0.0
style_reward_scale: 2.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,14 @@ agent:
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
observation_preprocessor: RunningStandardScaler
observation_preprocessor_kwargs: null
state_preprocessor: null
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
amp_observation_preprocessor: RunningStandardScaler
amp_observation_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
Expand All @@ -91,10 +93,9 @@ agent:
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
task_reward_scale: 0.0
style_reward_scale: 2.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,14 @@ agent:
learning_rate: 5.0e-05
learning_rate_scheduler: null
learning_rate_scheduler_kwargs: null
state_preprocessor: RunningStandardScaler
observation_preprocessor: RunningStandardScaler
observation_preprocessor_kwargs: null
state_preprocessor: null
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
amp_state_preprocessor: RunningStandardScaler
amp_state_preprocessor_kwargs: null
amp_observation_preprocessor: RunningStandardScaler
amp_observation_preprocessor_kwargs: null
random_timesteps: 0
learning_starts: 0
grad_norm_clip: 0.0
Expand All @@ -91,10 +93,9 @@ agent:
value_loss_scale: 2.5
discriminator_loss_scale: 5.0
amp_batch_size: 512
task_reward_weight: 0.0
style_reward_weight: 1.0
task_reward_scale: 0.0
style_reward_scale: 2.0
discriminator_batch_size: 4096
discriminator_reward_scale: 2.0
discriminator_logit_regularization_scale: 0.05
discriminator_gradient_penalty_scale: 5.0
discriminator_weight_decay_scale: 1.0e-04
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ agent:
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.016
state_preprocessor: RunningStandardScaler
observation_preprocessor: RunningStandardScaler
observation_preprocessor_kwargs: null
state_preprocessor: null
state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ models:
clip_actions: False
network:
- name: net
input: OBSERVATIONS
input: STATES
layers: [512, 512, 256, 128]
activations: elu
output: ONE
Expand All @@ -54,10 +54,10 @@ agent:
learning_rate_scheduler: KLAdaptiveLR
learning_rate_scheduler_kwargs:
kl_threshold: 0.016
observation_preprocessor: RunningStandardScaler
observation_preprocessor_kwargs: null
state_preprocessor: RunningStandardScaler
state_preprocessor_kwargs: null
shared_state_preprocessor: RunningStandardScaler
shared_state_preprocessor_kwargs: null
value_preprocessor: RunningStandardScaler
value_preprocessor_kwargs: null
random_timesteps: 0
Expand Down
Loading