Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Guidelines for modifications:
* HoJin Jeon
* Hongwei Xiong
* Hongyu Li
* Hui Kang
* Iretiayo Akinola
* Jack Zeng
* Jan Kerner
Expand All @@ -65,6 +66,7 @@ Guidelines for modifications:
* Jingzhou Liu
* Johnson Sun
* Kaixi Bao
* Kingsley Liu
* Kourosh Darvish
* Lionel Gulich
* Louis Le Lay
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
18 changes: 13 additions & 5 deletions docs/source/overview/environments.rst
Original file line number Diff line number Diff line change
Expand Up @@ -289,15 +289,19 @@ Navigation
.. table::
:widths: 33 37 30

+----------------+---------------------+-----------------------------------------------------------------------------+
| World | Environment ID | Description |
+================+=====================+=============================================================================+
| |anymal_c_nav| | |anymal_c_nav-link| | Navigate towards a target x-y position and heading with the ANYmal C robot. |
+----------------+---------------------+-----------------------------------------------------------------------------+
+-------------------------+------------------------------+---------------------------------------------------------------------------------------------------------+
| World | Environment ID | Description |
+=========================+==============================+=========================================================================================================+
| |anymal_c_nav| | |anymal_c_nav-link| | Navigate towards a target x-y position and heading with the ANYmal C robot. |
+-------------------------+------------------------------+---------------------------------------------------------------------------------------------------------+
| |anymal_c_nav_obstacle| | |anymal_c_nav_obstacle-link| | Navigate towards a target x-y position and heading with the ANYmal C robot in a scene with an obstacle. |
+-------------------------+------------------------------+---------------------------------------------------------------------------------------------------------+

.. |anymal_c_nav-link| replace:: `Isaac-Navigation-Flat-Anymal-C-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/navigation_env_cfg.py>`__
.. |anymal_c_nav_obstacle-link| replace:: `Isaac-Navigation-Flat-Obstacle-Anymal-C-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/navigation_obstacle_env_cfg.py>`__

.. |anymal_c_nav| image:: ../_static/tasks/navigation/anymal_c_nav.jpg
.. |anymal_c_nav_obstacle| image:: ../_static/tasks/navigation/anymal_c_nav_obstacle.jpg


Others
Expand Down Expand Up @@ -492,6 +496,10 @@ Comprehensive List of Environments
- Isaac-Navigation-Flat-Anymal-C-Play-v0
- Manager Based
- **rsl_rl** (PPO), **skrl** (PPO)
* - Isaac-Navigation-Flat-Obstacle-Anymal-C-v0
- Isaac-Navigation-Flat-Obstacle-Anymal-C-Play-v0
- Manager Based
- **rsl_rl** (PPO)
* - Isaac-Open-Drawer-Franka-IK-Abs-v0
-
- Manager Based
Expand Down
17 changes: 13 additions & 4 deletions scripts/reinforcement_learning/rsl_rl/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
import time
import torch

from rsl_rl.runners import OnPolicyRunner
from rsl_rl.runners import OnPolicyRunner, OnPolicyRunnerConv2d

from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent
from isaaclab.utils.assets import retrieve_file_path
Expand Down Expand Up @@ -110,7 +110,10 @@ def main():

print(f"[INFO]: Loading model checkpoint from: {resume_path}")
# load previously trained model
ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
if agent_cfg.policy.class_name == "ActorCriticConv2d":
ppo_runner = OnPolicyRunnerConv2d(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
else:
ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
ppo_runner.load(resume_path)

# obtain the trained policy for inference
Expand All @@ -128,7 +131,10 @@ def main():
dt = env.unwrapped.physics_dt

# reset environment
obs, _ = env.get_observations()
obs, extras = env.get_observations()
if "sensor" in extras["observations"]:
image_obs = extras["observations"]["sensor"].permute(0, 3, 1, 2).flatten(start_dim=1)
obs = torch.cat([obs, image_obs], dim=1)
timestep = 0
# simulate environment
while simulation_app.is_running():
Expand All @@ -138,7 +144,10 @@ def main():
# agent stepping
actions = policy(obs)
# env stepping
obs, _, _, _ = env.step(actions)
obs, _, _, infos = env.step(actions)
if "sensor" in infos["observations"]:
image_obs = infos["observations"]["sensor"].permute(0, 3, 1, 2).flatten(start_dim=1)
obs = torch.cat([obs, image_obs], dim=1)
if args_cli.video:
timestep += 1
# Exit the play loop after recording one video
Expand Down
7 changes: 5 additions & 2 deletions scripts/reinforcement_learning/rsl_rl/train.py
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for creating the PR, how to edit code in train.py and play.py is probably going to wait until the rsl_rl code support for cnn to become available. in ideal case, cnn will be naturally supported without any modification to train.py and play.py needed XD

Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
import torch
from datetime import datetime

from rsl_rl.runners import OnPolicyRunner
from rsl_rl.runners import OnPolicyRunner, OnPolicyRunnerConv2d

from isaaclab.envs import (
DirectMARLEnv,
Expand Down Expand Up @@ -127,7 +127,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
env = RslRlVecEnvWrapper(env)

# create runner from rsl-rl
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
if agent_cfg.policy.class_name == "ActorCriticConv2d":
runner = OnPolicyRunnerConv2d(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
else:
runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
# write git state to logs
runner.add_git_repo_to_log(__file__)
# load the checkpoint
Expand Down
2 changes: 1 addition & 1 deletion source/isaaclab/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.34.9"
version = "0.34.10"

# Description
title = "Isaac Lab framework for Robot Learning"
Expand Down
10 changes: 10 additions & 0 deletions source/isaaclab/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
Changelog
---------

0.34.10 (2025-03-06)
~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added the pose2d command that generates the target position based on the position of the obstacle object.
* Added the reset event that samples a position in a sector of a ring area defined by the radius and angle ranges.


0.34.9 (2025-03-04)
~~~~~~~~~~~~~~~~~~~

Expand Down
3 changes: 2 additions & 1 deletion source/isaaclab/isaaclab/envs/mdp/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
from .commands_cfg import (
NormalVelocityCommandCfg,
NullCommandCfg,
ObstaclePose2dCommandCfg,
TerrainBasedPose2dCommandCfg,
UniformPose2dCommandCfg,
UniformPoseCommandCfg,
UniformVelocityCommandCfg,
)
from .null_command import NullCommand
from .pose_2d_command import TerrainBasedPose2dCommand, UniformPose2dCommand
from .pose_2d_command import ObstaclePose2dCommand, TerrainBasedPose2dCommand, UniformPose2dCommand
from .pose_command import UniformPoseCommand
from .velocity_command import NormalVelocityCommand, UniformVelocityCommand
25 changes: 24 additions & 1 deletion source/isaaclab/isaaclab/envs/mdp/commands/commands_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from isaaclab.utils import configclass

from .null_command import NullCommand
from .pose_2d_command import TerrainBasedPose2dCommand, UniformPose2dCommand
from .pose_2d_command import ObstaclePose2dCommand, TerrainBasedPose2dCommand, UniformPose2dCommand
from .pose_command import UniformPoseCommand
from .velocity_command import NormalVelocityCommand, UniformVelocityCommand

Expand Down Expand Up @@ -228,6 +228,29 @@ class Ranges:
goal_pose_visualizer_cfg.markers["arrow"].scale = (0.2, 0.2, 0.8)


@configclass
class ObstaclePose2dCommandCfg(UniformPose2dCommandCfg):
"""Configuration for the obstacle-based 2D-pose command generator."""

class_type: type = ObstaclePose2dCommand

object_name: str = MISSING
"""Name of the obstacle object in the environment."""

@configclass
class Ranges:
"""Uniform distribution ranges for the position commands."""

heading: tuple[float, float] = MISSING
"""Heading range for the position commands (in rad).

Used only if :attr:`simple_heading` is False.
"""

ranges: Ranges = MISSING
"""Distribution ranges for the sampled commands."""


@configclass
class TerrainBasedPose2dCommandCfg(UniformPose2dCommandCfg):
"""Configuration for the terrain-based position command generator."""
Expand Down
64 changes: 62 additions & 2 deletions source/isaaclab/isaaclab/envs/mdp/commands/pose_2d_command.py
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why sample the goal position by using obstacles root_pos? what if
2 * self.object.data.root_pos_w[env_ids, 0] - self.robot.data.root_pos_w[env_ids, 0] is within obstacle mesh? or if it is outside the terrain boundary if using non-Flat Terrain? e.g. Terrain Generator. If there are multiple objects does it also work? How do you guarantee the result goal position is not ill-form if terrains and objects is been modified? If the command term is not general enough, it is recommented to implemented in task-level mdp rather than in env.mdp. If you want to write is env.mdp, maybe we need better formulate the command sampling strategy.

Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from collections.abc import Sequence
from typing import TYPE_CHECKING

from isaaclab.assets import Articulation
from isaaclab.assets import Articulation, RigidObject
from isaaclab.managers import CommandTerm
from isaaclab.markers import VisualizationMarkers
from isaaclab.terrains import TerrainImporter
Expand All @@ -20,7 +20,7 @@
if TYPE_CHECKING:
from isaaclab.envs import ManagerBasedEnv

from .commands_cfg import TerrainBasedPose2dCommandCfg, UniformPose2dCommandCfg
from .commands_cfg import ObstaclePose2dCommandCfg, TerrainBasedPose2dCommandCfg, UniformPose2dCommandCfg


class UniformPose2dCommand(CommandTerm):
Expand Down Expand Up @@ -143,6 +143,66 @@ def _debug_vis_callback(self, event):
)


class ObstaclePose2dCommand(UniformPose2dCommand):
"""Command generator that generates pose commands based on the obstacle.

This command generator determines the command position based on the position of the obstacle.
The heading commands are either set to point towards the target or are sampled uniformly.
This can be configured through the :attr:`Pose2dCommandCfg.simple_heading` parameter in
the configuration.
"""

cfg: ObstaclePose2dCommandCfg
"""Configuration for the command generator."""

def __init__(self, cfg: ObstaclePose2dCommandCfg, env: ManagerBasedEnv):
"""Initialize the command generator class.

Args:
cfg: The configuration parameters for the command generator.
env: The environment object.
"""
# initialize the base class
super().__init__(cfg, env)

# obtain the obstacle object
self.object: RigidObject = env.scene[cfg.object_name]

def _resample_command(self, env_ids: Sequence[int]):
# obtain env origins for the environments
self.pos_command_w[env_ids] = self._env.scene.env_origins[env_ids]
r = torch.empty(len(env_ids), device=self.device)
# offset the position command by the current root position
self.pos_command_w[env_ids, 0] = (
2 * self.object.data.root_pos_w[env_ids, 0] - self.robot.data.root_pos_w[env_ids, 0]
)
self.pos_command_w[env_ids, 1] = (
2 * self.object.data.root_pos_w[env_ids, 1] - self.robot.data.root_pos_w[env_ids, 1]
)
self.pos_command_w[env_ids, 2] = self.robot.data.default_root_state[env_ids, 2]

if self.cfg.simple_heading:
# set heading command to point towards target
target_vec = self.pos_command_w[env_ids] - self.robot.data.root_pos_w[env_ids]
target_direction = torch.atan2(target_vec[:, 1], target_vec[:, 0])
flipped_target_direction = wrap_to_pi(target_direction + torch.pi)

# compute errors to find the closest direction to the current heading
# this is done to avoid the discontinuity at the -pi/pi boundary
curr_to_target = wrap_to_pi(target_direction - self.robot.data.heading_w[env_ids]).abs()
curr_to_flipped_target = wrap_to_pi(flipped_target_direction - self.robot.data.heading_w[env_ids]).abs()

# set the heading command to the closest direction
self.heading_command_w[env_ids] = torch.where(
curr_to_target < curr_to_flipped_target,
target_direction,
flipped_target_direction,
)
else:
# random heading command
self.heading_command_w[env_ids] = r.uniform_(*self.cfg.ranges.heading)


class TerrainBasedPose2dCommand(UniformPose2dCommand):
"""Command generator that generates pose commands based on the terrain.

Expand Down
39 changes: 39 additions & 0 deletions source/isaaclab/isaaclab/envs/mdp/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,45 @@ def reset_root_state_uniform(
asset.write_root_velocity_to_sim(velocities, env_ids=env_ids)


def reset_root_state_uniform_angular(
env: ManagerBasedEnv,
env_ids: torch.Tensor,
radius_range: tuple[float, float],
angle_range: tuple[float, float],
asset_cfg: SceneEntityCfg = SceneEntityCfg("object"),
):
"""Reset the asset root state to a random position within a sector of a ring area defined by the radius and angle ranges.

This function randomizes the root position of the asset.

* It samples the root position from the given angular ranges and adds them to the default root position, before setting
them into the physics simulation.
* It keeps the root orientation unchanged.
* It keeps the root velocity unchanged.

The function takes tuples of the form ``(min, max)`` for the radius and angle ranges.
"""
# extract the used quantities (to enable type-hinting)
asset: RigidObject | Articulation = env.scene[asset_cfg.name]
# get default root state
root_states = asset.data.default_root_state[env_ids].clone()

# poses
range_list = [radius_range, angle_range]
ranges = torch.tensor(range_list, device=asset.device)
rand_samples = math_utils.sample_uniform(ranges[:, 0], ranges[:, 1], (len(env_ids), 2), device=asset.device)

rand_x = rand_samples[:, 0:1] * torch.cos(rand_samples[:, 1:2])
rand_y = rand_samples[:, 0:1] * torch.sin(rand_samples[:, 1:2])
rand_pos = torch.cat([rand_x, rand_y], dim=-1)

positions = root_states[:, 0:2] + env.scene.env_origins[env_ids, 0:2] + rand_pos
orientations = root_states[:, 2:7]

# set into the physics simulation
asset.write_root_pose_to_sim(torch.cat([positions, orientations], dim=-1), env_ids=env_ids)


def reset_root_state_with_random_orientation(
env: ManagerBasedEnv,
env_ids: torch.Tensor,
Expand Down
2 changes: 1 addition & 1 deletion source/isaaclab_rl/config/extension.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

# Note: Semantic Versioning is used: https://semver.org/
version = "0.1.0"
version = "0.1.1"

# Description
title = "Isaac Lab RL"
Expand Down
9 changes: 9 additions & 0 deletions source/isaaclab_rl/docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
Changelog
---------

0.1.1 (2025-03-06)
~~~~~~~~~~~~~~~~~~~~

Added
^^^^^

* Added configuration for RSL RL actor-critic networks with convolutional layers.


0.1.0 (2024-12-27)
~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion source/isaaclab_rl/isaaclab_rl/rsl_rl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@
"""

from .exporter import export_policy_as_jit, export_policy_as_onnx
from .rl_cfg import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
from .rl_cfg import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoActorCriticConv2dCfg, RslRlPpoAlgorithmCfg
from .vecenv_wrapper import RslRlVecEnvWrapper
5 changes: 4 additions & 1 deletion source/isaaclab_rl/isaaclab_rl/rsl_rl/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ def export(self, path, filename):
dynamic_axes={},
)
else:
obs = torch.zeros(1, self.actor[0].in_features)
input_size = getattr(self.actor, "input_dim", None)
if input_size is None:
input_size = self.actor[0].in_features
obs = torch.zeros(1, input_size)
torch.onnx.export(
self,
obs,
Expand Down
18 changes: 18 additions & 0 deletions source/isaaclab_rl/isaaclab_rl/rsl_rl/rl_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,24 @@ class RslRlPpoActorCriticCfg:
"""The activation function for the actor and critic networks."""


@configclass
class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg):
"""Configuration for the PPO actor-critic networks with convolutional layers."""

class_name: str = "ActorCriticConv2d"
"""The policy class name. Default is ActorCriticConv2d."""

conv_layers_params: list[dict] = [
{"out_channels": 4, "kernel_size": 3, "stride": 2},
{"out_channels": 8, "kernel_size": 3, "stride": 2},
{"out_channels": 16, "kernel_size": 3, "stride": 2},
]
"""List of convolutional layer parameters for the convolutional network."""

conv_linear_output_size: int = 16
"""Output size of the linear layer after the convolutional features are flattened."""


@configclass
class RslRlPpoAlgorithmCfg:
"""Configuration for the PPO algorithm."""
Expand Down
Loading