isaac-sim · KingsleyLiu-NV · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -56,6 +56,7 @@ Guidelines for modifications:
 * HoJin Jeon
 * Hongwei Xiong
 * Hongyu Li
+* Hui Kang
 * Iretiayo Akinola
 * Jack Zeng
 * Jan Kerner
@@ -65,6 +66,7 @@ Guidelines for modifications:
 * Jingzhou Liu
 * Johnson Sun
 * Kaixi Bao
+* Kingsley Liu
 * Kourosh Darvish
 * Lionel Gulich
 * Louis Le Lay

@@ -289,15 +289,19 @@ Navigation
 .. table::
     :widths: 33 37 30
 
-    +----------------+---------------------+-----------------------------------------------------------------------------+
-    | World          | Environment ID      | Description                                                                 |
-    +================+=====================+=============================================================================+
-    | |anymal_c_nav| | |anymal_c_nav-link| | Navigate towards a target x-y position and heading with the ANYmal C robot. |
-    +----------------+---------------------+-----------------------------------------------------------------------------+
+    +-------------------------+------------------------------+---------------------------------------------------------------------------------------------------------+
+    | World                   | Environment ID               | Description                                                                                             |
+    +=========================+==============================+=========================================================================================================+
+    | |anymal_c_nav|          | |anymal_c_nav-link|          | Navigate towards a target x-y position and heading with the ANYmal C robot.                             |
+    +-------------------------+------------------------------+---------------------------------------------------------------------------------------------------------+
+    | |anymal_c_nav_obstacle| | |anymal_c_nav_obstacle-link| | Navigate towards a target x-y position and heading with the ANYmal C robot in a scene with an obstacle. |
+    +-------------------------+------------------------------+---------------------------------------------------------------------------------------------------------+
 
 .. |anymal_c_nav-link| replace:: `Isaac-Navigation-Flat-Anymal-C-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/navigation_env_cfg.py>`__
+.. |anymal_c_nav_obstacle-link| replace:: `Isaac-Navigation-Flat-Obstacle-Anymal-C-v0 <https://github.com/isaac-sim/IsaacLab/blob/main/source/isaaclab_tasks/isaaclab_tasks/manager_based/navigation/config/anymal_c/navigation_obstacle_env_cfg.py>`__
 
 .. |anymal_c_nav| image:: ../_static/tasks/navigation/anymal_c_nav.jpg
+.. |anymal_c_nav_obstacle| image:: ../_static/tasks/navigation/anymal_c_nav_obstacle.jpg
 
 
 Others
@@ -492,6 +496,10 @@ Comprehensive List of Environments
       - Isaac-Navigation-Flat-Anymal-C-Play-v0
       - Manager Based
       - **rsl_rl** (PPO), **skrl** (PPO)
+    * - Isaac-Navigation-Flat-Obstacle-Anymal-C-v0
+      - Isaac-Navigation-Flat-Obstacle-Anymal-C-Play-v0
+      - Manager Based
+      - **rsl_rl** (PPO)
     * - Isaac-Open-Drawer-Franka-IK-Abs-v0
       -
       - Manager Based

@@ -49,7 +49,7 @@
 import time
 import torch
 
-from rsl_rl.runners import OnPolicyRunner
+from rsl_rl.runners import OnPolicyRunner, OnPolicyRunnerConv2d
 
 from isaaclab.envs import DirectMARLEnv, multi_agent_to_single_agent
 from isaaclab.utils.assets import retrieve_file_path
@@ -110,7 +110,10 @@ def main():
 
     print(f"[INFO]: Loading model checkpoint from: {resume_path}")
     # load previously trained model
-    ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    if agent_cfg.policy.class_name == "ActorCriticConv2d":
+        ppo_runner = OnPolicyRunnerConv2d(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
+    else:
+        ppo_runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=None, device=agent_cfg.device)
     ppo_runner.load(resume_path)
 
     # obtain the trained policy for inference
@@ -128,7 +131,10 @@ def main():
     dt = env.unwrapped.physics_dt
 
     # reset environment
-    obs, _ = env.get_observations()
+    obs, extras = env.get_observations()
+    if "sensor" in extras["observations"]:
+        image_obs = extras["observations"]["sensor"].permute(0, 3, 1, 2).flatten(start_dim=1)
+        obs = torch.cat([obs, image_obs], dim=1)
     timestep = 0
     # simulate environment
     while simulation_app.is_running():
@@ -138,7 +144,10 @@ def main():
             # agent stepping
             actions = policy(obs)
             # env stepping
-            obs, _, _, _ = env.step(actions)
+            obs, _, _, infos = env.step(actions)
+            if "sensor" in infos["observations"]:
+                image_obs = infos["observations"]["sensor"].permute(0, 3, 1, 2).flatten(start_dim=1)
+                obs = torch.cat([obs, image_obs], dim=1)
         if args_cli.video:
             timestep += 1
             # Exit the play loop after recording one video

@@ -49,7 +49,7 @@
 import torch
 from datetime import datetime
 
-from rsl_rl.runners import OnPolicyRunner
+from rsl_rl.runners import OnPolicyRunner, OnPolicyRunnerConv2d
 
 from isaaclab.envs import (
     DirectMARLEnv,
@@ -127,7 +127,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
     env = RslRlVecEnvWrapper(env)
 
     # create runner from rsl-rl
-    runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
+    if agent_cfg.policy.class_name == "ActorCriticConv2d":
+        runner = OnPolicyRunnerConv2d(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
+    else:
+        runner = OnPolicyRunner(env, agent_cfg.to_dict(), log_dir=log_dir, device=agent_cfg.device)
     # write git state to logs
     runner.add_git_repo_to_log(__file__)
     # load the checkpoint

diff --git a/source/isaaclab/config/extension.toml b/source/isaaclab/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.34.9"
+version = "0.34.10"
 
 # Description
 title = "Isaac Lab framework for Robot Learning"

diff --git a/source/isaaclab/docs/CHANGELOG.rst b/source/isaaclab/docs/CHANGELOG.rst
@@ -1,6 +1,16 @@
 Changelog
 ---------
 
+0.34.10 (2025-03-06)
+~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added the pose2d command that generates the target position based on the position of the obstacle object.
+* Added the reset event that samples a position in a sector of a ring area defined by the radius and angle ranges.
+
+
 0.34.9 (2025-03-04)
 ~~~~~~~~~~~~~~~~~~~
 

@@ -8,12 +8,13 @@
 from .commands_cfg import (
     NormalVelocityCommandCfg,
     NullCommandCfg,
+    ObstaclePose2dCommandCfg,
     TerrainBasedPose2dCommandCfg,
     UniformPose2dCommandCfg,
     UniformPoseCommandCfg,
     UniformVelocityCommandCfg,
 )
 from .null_command import NullCommand
-from .pose_2d_command import TerrainBasedPose2dCommand, UniformPose2dCommand
+from .pose_2d_command import ObstaclePose2dCommand, TerrainBasedPose2dCommand, UniformPose2dCommand
 from .pose_command import UniformPoseCommand
 from .velocity_command import NormalVelocityCommand, UniformVelocityCommand
@@ -12,7 +12,7 @@
 from isaaclab.utils import configclass
 
 from .null_command import NullCommand
-from .pose_2d_command import TerrainBasedPose2dCommand, UniformPose2dCommand
+from .pose_2d_command import ObstaclePose2dCommand, TerrainBasedPose2dCommand, UniformPose2dCommand
 from .pose_command import UniformPoseCommand
 from .velocity_command import NormalVelocityCommand, UniformVelocityCommand
 
@@ -228,6 +228,29 @@ class Ranges:
     goal_pose_visualizer_cfg.markers["arrow"].scale = (0.2, 0.2, 0.8)
 
 
+@configclass
+class ObstaclePose2dCommandCfg(UniformPose2dCommandCfg):
+    """Configuration for the obstacle-based 2D-pose command generator."""
+
+    class_type: type = ObstaclePose2dCommand
+
+    object_name: str = MISSING
+    """Name of the obstacle object in the environment."""
+
+    @configclass
+    class Ranges:
+        """Uniform distribution ranges for the position commands."""
+
+        heading: tuple[float, float] = MISSING
+        """Heading range for the position commands (in rad).
+
+        Used only if :attr:`simple_heading` is False.
+        """
+
+    ranges: Ranges = MISSING
+    """Distribution ranges for the sampled commands."""
+
+
 @configclass
 class TerrainBasedPose2dCommandCfg(UniformPose2dCommandCfg):
     """Configuration for the terrain-based position command generator."""

@@ -11,7 +11,7 @@
 from collections.abc import Sequence
 from typing import TYPE_CHECKING
 
-from isaaclab.assets import Articulation
+from isaaclab.assets import Articulation, RigidObject
 from isaaclab.managers import CommandTerm
 from isaaclab.markers import VisualizationMarkers
 from isaaclab.terrains import TerrainImporter
@@ -20,7 +20,7 @@
 if TYPE_CHECKING:
     from isaaclab.envs import ManagerBasedEnv
 
-    from .commands_cfg import TerrainBasedPose2dCommandCfg, UniformPose2dCommandCfg
+    from .commands_cfg import ObstaclePose2dCommandCfg, TerrainBasedPose2dCommandCfg, UniformPose2dCommandCfg
 
 
 class UniformPose2dCommand(CommandTerm):
@@ -143,6 +143,66 @@ def _debug_vis_callback(self, event):
         )
 
 
+class ObstaclePose2dCommand(UniformPose2dCommand):
+    """Command generator that generates pose commands based on the obstacle.
+
+    This command generator determines the command position based on the position of the obstacle.
+    The heading commands are either set to point towards the target or are sampled uniformly.
+    This can be configured through the :attr:`Pose2dCommandCfg.simple_heading` parameter in
+    the configuration.
+    """
+
+    cfg: ObstaclePose2dCommandCfg
+    """Configuration for the command generator."""
+
+    def __init__(self, cfg: ObstaclePose2dCommandCfg, env: ManagerBasedEnv):
+        """Initialize the command generator class.
+
+        Args:
+            cfg: The configuration parameters for the command generator.
+            env: The environment object.
+        """
+        # initialize the base class
+        super().__init__(cfg, env)
+
+        # obtain the obstacle object
+        self.object: RigidObject = env.scene[cfg.object_name]
+
+    def _resample_command(self, env_ids: Sequence[int]):
+        # obtain env origins for the environments
+        self.pos_command_w[env_ids] = self._env.scene.env_origins[env_ids]
+        r = torch.empty(len(env_ids), device=self.device)
+        # offset the position command by the current root position
+        self.pos_command_w[env_ids, 0] = (
+            2 * self.object.data.root_pos_w[env_ids, 0] - self.robot.data.root_pos_w[env_ids, 0]
+        )
+        self.pos_command_w[env_ids, 1] = (
+            2 * self.object.data.root_pos_w[env_ids, 1] - self.robot.data.root_pos_w[env_ids, 1]
+        )
+        self.pos_command_w[env_ids, 2] = self.robot.data.default_root_state[env_ids, 2]
+
+        if self.cfg.simple_heading:
+            # set heading command to point towards target
+            target_vec = self.pos_command_w[env_ids] - self.robot.data.root_pos_w[env_ids]
+            target_direction = torch.atan2(target_vec[:, 1], target_vec[:, 0])
+            flipped_target_direction = wrap_to_pi(target_direction + torch.pi)
+
+            # compute errors to find the closest direction to the current heading
+            # this is done to avoid the discontinuity at the -pi/pi boundary
+            curr_to_target = wrap_to_pi(target_direction - self.robot.data.heading_w[env_ids]).abs()
+            curr_to_flipped_target = wrap_to_pi(flipped_target_direction - self.robot.data.heading_w[env_ids]).abs()
+
+            # set the heading command to the closest direction
+            self.heading_command_w[env_ids] = torch.where(
+                curr_to_target < curr_to_flipped_target,
+                target_direction,
+                flipped_target_direction,
+            )
+        else:
+            # random heading command
+            self.heading_command_w[env_ids] = r.uniform_(*self.cfg.ranges.heading)
+
+
 class TerrainBasedPose2dCommand(UniformPose2dCommand):
     """Command generator that generates pose commands based on the terrain.
 

@@ -737,6 +737,45 @@ def reset_root_state_uniform(
     asset.write_root_velocity_to_sim(velocities, env_ids=env_ids)
 
 
+def reset_root_state_uniform_angular(
+    env: ManagerBasedEnv,
+    env_ids: torch.Tensor,
+    radius_range: tuple[float, float],
+    angle_range: tuple[float, float],
+    asset_cfg: SceneEntityCfg = SceneEntityCfg("object"),
+):
+    """Reset the asset root state to a random position within a sector of a ring area defined by the radius and angle ranges.
+
+    This function randomizes the root position of the asset.
+
+    * It samples the root position from the given angular ranges and adds them to the default root position, before setting
+      them into the physics simulation.
+    * It keeps the root orientation unchanged.
+    * It keeps the root velocity unchanged.
+
+    The function takes tuples of the form ``(min, max)`` for the radius and angle ranges.
+    """
+    # extract the used quantities (to enable type-hinting)
+    asset: RigidObject | Articulation = env.scene[asset_cfg.name]
+    # get default root state
+    root_states = asset.data.default_root_state[env_ids].clone()
+
+    # poses
+    range_list = [radius_range, angle_range]
+    ranges = torch.tensor(range_list, device=asset.device)
+    rand_samples = math_utils.sample_uniform(ranges[:, 0], ranges[:, 1], (len(env_ids), 2), device=asset.device)
+
+    rand_x = rand_samples[:, 0:1] * torch.cos(rand_samples[:, 1:2])
+    rand_y = rand_samples[:, 0:1] * torch.sin(rand_samples[:, 1:2])
+    rand_pos = torch.cat([rand_x, rand_y], dim=-1)
+
+    positions = root_states[:, 0:2] + env.scene.env_origins[env_ids, 0:2] + rand_pos
+    orientations = root_states[:, 2:7]
+
+    # set into the physics simulation
+    asset.write_root_pose_to_sim(torch.cat([positions, orientations], dim=-1), env_ids=env_ids)
+
+
 def reset_root_state_with_random_orientation(
     env: ManagerBasedEnv,
     env_ids: torch.Tensor,

diff --git a/source/isaaclab_rl/config/extension.toml b/source/isaaclab_rl/config/extension.toml
@@ -1,7 +1,7 @@
 [package]
 
 # Note: Semantic Versioning is used: https://semver.org/
-version = "0.1.0"
+version = "0.1.1"
 
 # Description
 title = "Isaac Lab RL"

diff --git a/source/isaaclab_rl/docs/CHANGELOG.rst b/source/isaaclab_rl/docs/CHANGELOG.rst
@@ -1,6 +1,15 @@
 Changelog
 ---------
 
+0.1.1 (2025-03-06)
+~~~~~~~~~~~~~~~~~~~~
+
+Added
+^^^^^
+
+* Added configuration for RSL RL actor-critic networks with convolutional layers.
+
+
 0.1.0 (2024-12-27)
 ~~~~~~~~~~~~~~~~~~
 

@@ -16,5 +16,5 @@
 """
 
 from .exporter import export_policy_as_jit, export_policy_as_onnx
-from .rl_cfg import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoAlgorithmCfg
+from .rl_cfg import RslRlOnPolicyRunnerCfg, RslRlPpoActorCriticCfg, RslRlPpoActorCriticConv2dCfg, RslRlPpoAlgorithmCfg
 from .vecenv_wrapper import RslRlVecEnvWrapper
@@ -137,7 +137,10 @@ def export(self, path, filename):
                 dynamic_axes={},
             )
         else:
-            obs = torch.zeros(1, self.actor[0].in_features)
+            input_size = getattr(self.actor, "input_dim", None)
+            if input_size is None:
+                input_size = self.actor[0].in_features
+            obs = torch.zeros(1, input_size)
             torch.onnx.export(
                 self,
                 obs,

@@ -29,6 +29,24 @@ class RslRlPpoActorCriticCfg:
     """The activation function for the actor and critic networks."""
 
 
+@configclass
+class RslRlPpoActorCriticConv2dCfg(RslRlPpoActorCriticCfg):
+    """Configuration for the PPO actor-critic networks with convolutional layers."""
+
+    class_name: str = "ActorCriticConv2d"
+    """The policy class name. Default is ActorCriticConv2d."""
+
+    conv_layers_params: list[dict] = [
+        {"out_channels": 4, "kernel_size": 3, "stride": 2},
+        {"out_channels": 8, "kernel_size": 3, "stride": 2},
+        {"out_channels": 16, "kernel_size": 3, "stride": 2},
+    ]
+    """List of convolutional layer parameters for the convolutional network."""
+
+    conv_linear_output_size: int = 16
+    """Output size of the linear layer after the convolutional features are flattened."""
+
+
 @configclass
 class RslRlPpoAlgorithmCfg:
     """Configuration for the PPO algorithm."""