diff --git a/source/isaaclab/isaaclab/utils/warp/utils.py b/source/isaaclab/isaaclab/utils/warp/utils.py new file mode 100644 index 000000000000..2df288dfba2c --- /dev/null +++ b/source/isaaclab/isaaclab/utils/warp/utils.py @@ -0,0 +1,151 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import functools +from collections.abc import Sequence + +import torch +import warp as wp + +## +# Mask resolution - ids/mask to warp boolean mask. +## + + +@wp.kernel +def _populate_mask_from_ids( + mask: wp.array(dtype=wp.bool), + ids: wp.array(dtype=wp.int32), +): + i = wp.tid() + mask[ids[i]] = True + + +def resolve_1d_mask( + *, + ids: Sequence[int] | slice | torch.Tensor | wp.array | None = None, + mask: wp.array | torch.Tensor | None = None, + all_mask: wp.array, + scratch_mask: wp.array, + device: str, +) -> wp.array: + """Resolve ids/mask into a warp boolean mask. + + Callers provide pre-allocated ``all_mask`` (all-True) and ``scratch_mask`` (reusable + work buffer) so this function never allocates. + + Args: + ids: Index ids. Accepts ``Sequence[int]``, ``slice``, ``torch.Tensor``, + ``wp.array(dtype=wp.int32)``, or ``None`` (all elements). + mask: Direct boolean mask. ``wp.array`` is returned as-is; + ``torch.Tensor`` is converted. + all_mask: Pre-allocated all-True mask returned when both *ids* and *mask* + are ``None``. + scratch_mask: Pre-allocated scratch buffer populated in-place when *ids* + are provided. Not re-entrant (shared buffer). + device: Warp device string (e.g. ``"cuda:0"``). + + Returns: + A ``wp.array(dtype=wp.bool)`` mask. + """ + # Normalize slice(None) to None so the capture guard treats it identically to ids=None. + if isinstance(ids, slice) and ids == slice(None): + ids = None + + if wp.get_device().is_capturing: + if ids is not None or (mask is not None and not isinstance(mask, wp.array)): + raise RuntimeError( + "resolve_1d_mask is only capturable when mask is a wp.array or both ids and mask are None." + ) + + # --- Direct mask input --- + if mask is not None: + if isinstance(mask, wp.array): + return mask + if isinstance(mask, torch.Tensor): + if mask.dtype != torch.bool: + mask = mask.to(dtype=torch.bool) + if str(mask.device) != device: + mask = mask.to(device) + return wp.from_torch(mask, dtype=wp.bool) + raise TypeError(f"Unsupported mask type: {type(mask)}") + + # --- Fast path: all elements --- + if ids is None: + return all_mask + + # --- Normalize slice to list --- + if isinstance(ids, slice): + start, stop, step = ids.indices(scratch_mask.shape[0]) + ids = list(range(start, stop, step)) + + # --- Normalize to concrete type --- + if not isinstance(ids, (torch.Tensor, wp.array)): + ids = list(ids) + + # --- Populate scratch mask --- + scratch_mask.fill_(False) + + if isinstance(ids, torch.Tensor): + if ids.numel() == 0: + return scratch_mask + if str(ids.device) != device: + ids = ids.to(device) + if ids.dtype != torch.int32: + ids = ids.to(dtype=torch.int32) + if not ids.is_contiguous(): + ids = ids.contiguous() + ids_wp = wp.from_torch(ids, dtype=wp.int32) + elif isinstance(ids, wp.array): + if ids.shape[0] == 0: + return scratch_mask + if ids.dtype != wp.int32: + raise TypeError(f"Unsupported wp.array dtype for ids: {ids.dtype}. Expected wp.int32 index array.") + ids_wp = ids + else: + if len(ids) == 0: + return scratch_mask + ids_wp = wp.array(ids, dtype=wp.int32, device=device) + + wp.launch(_populate_mask_from_ids, dim=ids_wp.shape[0], inputs=[scratch_mask, ids_wp], device=device) + return scratch_mask + + +## +# Capture safety — property guard. +## + + +def capture_unsafe(reason: str | None = None): + """Mark a callable as not CUDA-graph-capture-safe. + + Raises ``RuntimeError`` if the decorated callable is invoked while + ``wp.get_device().is_capturing`` is ``True``. + + Args: + reason: Optional explanation appended to the error message. + + Usage:: + + @property + @capture_unsafe("Relies on a Python timestamp guard.") + def projected_gravity_b(self) -> wp.array: ... + """ + + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if wp.get_device().is_capturing: + msg = f"'{func.__qualname__}' cannot be called during CUDA graph capture." + if reason: + msg = f"{msg} {reason}" + raise RuntimeError(msg) + return func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/manager_based_env_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/manager_based_env_warp.py index 7f747205e964..8c558b925947 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/manager_based_env_warp.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/manager_based_env_warp.py @@ -76,7 +76,10 @@ def __init__(self, cfg: ManagerBasedEnvCfg): self.cfg = cfg # initialize internal variables self._is_closed = False - self._manager_call_switch = ManagerCallSwitch() + # temporary debug runtime config for manager source/call switching. + cfg_source: dict | str | None = getattr(self.cfg, "manager_call_config", None) + max_modes: dict[str, int] | None = getattr(self.cfg, "manager_call_max_mode", None) + self._manager_call_switch = ManagerCallSwitch(cfg_source, max_modes=max_modes) self._apply_manager_term_cfg_profile() # set the seed for the environment @@ -265,6 +268,17 @@ def device(self): """The device on which the environment is running.""" return self.sim.device + @property + def env_origins_wp(self) -> wp.array: + """Scene env origins as a warp ``vec3f`` array. Cached on first access.""" + if not hasattr(self, "_env_origins_wp"): + origins = self.scene.env_origins + if isinstance(origins, wp.array): + self._env_origins_wp = origins + else: + self._env_origins_wp = wp.from_torch(origins, dtype=wp.vec3f) + return self._env_origins_wp + def resolve_env_mask( self, *, diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/manager_based_rl_env_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/manager_based_rl_env_warp.py index 221fba8cb9c2..2e1894e54003 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/manager_based_rl_env_warp.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/manager_based_rl_env_warp.py @@ -25,6 +25,7 @@ from isaaclab.envs.common import VecEnvStepReturn from isaaclab.envs.manager_based_rl_env_cfg import ManagerBasedRLEnvCfg +from isaaclab.managers import CommandManager from isaaclab.ui.widgets import ManagerLiveVisualizer from isaaclab.utils.timer import Timer @@ -33,14 +34,14 @@ from .manager_based_env_warp import ManagerBasedEnvWarp -DEBUG_TIMER_STEP = os.environ.get("DEBUG_TIMER_STEP", "0") == "1" -"""Enable outer step() timer. Set DEBUG_TIMER_STEP=1 env var to enable.""" - DEBUG_TIMERS = os.environ.get("DEBUG_TIMERS", "0") == "1" -"""Enable all fine-grained inner timers. Set DEBUG_TIMERS=1 env var to enable.""" +"""Enable outer step() timer. Set DEBUG_TIMERS=1 env var to enable.""" + +DEBUG_TIMER_STEP = os.environ.get("DEBUG_TIMER_STEP", "0") == "1" +"""Enable step sub-phase timers. Set DEBUG_TIMER_STEP=1 env var to enable.""" -TIMER_ENABLED_STEP = DEBUG_TIMER_STEP or DEBUG_TIMERS -TIMER_ENABLED_RESET_IDX = DEBUG_TIMERS +DEBUG_TIMER_RESET = os.environ.get("DEBUG_TIMER_RESET", "0") == "1" +"""Enable reset sub-phase timers. Set DEBUG_TIMER_RESET=1 env var to enable.""" class ManagerBasedRLEnvWarp(ManagerBasedEnvWarp, gym.Env): @@ -155,8 +156,6 @@ def load_managers(self): # note: this order is important since observation manager needs to know the command and action managers # and the reward manager needs to know the termination manager # -- command manager (stable impl — not routed through ManagerCallSwitch) - from isaaclab.managers import CommandManager - self.command_manager = CommandManager(self.cfg.commands, self) print("[INFO] Command Manager: ", self.command_manager) @@ -214,7 +213,7 @@ def step_warp_termination_compute(self) -> None: self.reset_terminated = self.termination_manager.terminated self.reset_time_outs = self.termination_manager.time_outs - @Timer(name="env_step", msg="Step took:", enable=TIMER_ENABLED_STEP, time_unit="us") + @Timer(name="env_step", msg="Step took:", enable=DEBUG_TIMER_STEP, time_unit="us") def step(self, action: torch.Tensor) -> VecEnvStepReturn: """Execute one time-step of the environment's dynamics and reset terminated environments. @@ -238,9 +237,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: # NOTE: keep a persistent action input buffer for graph pointer stability. # IMPORTANT: Do NOT re-wrap/replace the `wp.array` used by captured graphs each step. # Instead, copy the latest actions into the persistent buffer. - with Timer( - name="action_preprocess", msg="Action preprocessing took:", enable=TIMER_ENABLED_STEP, time_unit="us" - ): + with Timer(name="action_preprocess", msg="Action preprocessing took:", enable=DEBUG_TIMER_STEP, time_unit="us"): if self._action_in_wp is None: raise RuntimeError("Action buffer not initialized. Call reset() before step().") action_device = action.to(self.device) @@ -249,7 +246,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: self._manager_call_switch.call_stage( stage="ActionManager_process_action", warp_call={"fn": self.action_manager.process_action, "kwargs": {"action": self._action_in_wp}}, - timer=TIMER_ENABLED_STEP, + timer=DEBUG_TIMER_STEP, ) self.recorder_manager.record_pre_step() @@ -265,16 +262,16 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: self._manager_call_switch.call_stage( stage="ActionManager_apply_action", warp_call={"fn": self.action_manager.apply_action}, - timer=TIMER_ENABLED_STEP, + timer=DEBUG_TIMER_STEP, ) self._manager_call_switch.call_stage( stage="Scene_write_data_to_sim", warp_call={"fn": self.scene.write_data_to_sim}, - timer=TIMER_ENABLED_STEP, + timer=DEBUG_TIMER_STEP, ) # simulate - with Timer(name="simulate", msg="Newton simulation step took:", enable=TIMER_ENABLED_STEP, time_unit="us"): + with Timer(name="simulate", msg="Newton simulation step took:", enable=DEBUG_TIMER_STEP, time_unit="us"): self.sim.step(render=False) self.recorder_manager.record_post_physics_decimation_step() # render between steps only if the GUI or an RTX sensor needs it @@ -286,7 +283,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: with Timer( name="scene.update", msg="Scene.update took:", - enable=TIMER_ENABLED_STEP, + enable=DEBUG_TIMER_STEP, time_unit="us", ): self.scene.update(dt=self.physics_dt) @@ -300,12 +297,12 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: self._manager_call_switch.call_stage( stage="TerminationManager_compute", warp_call={"fn": self.step_warp_termination_compute}, - timer=TIMER_ENABLED_STEP, + timer=DEBUG_TIMER_STEP, ) self.reward_buf = self._manager_call_switch.call_stage( stage="RewardManager_compute", warp_call={"fn": self.reward_manager.compute, "kwargs": {"dt": float(self.step_dt)}}, - timer=TIMER_ENABLED_STEP, + timer=DEBUG_TIMER_STEP, ) if len(self.recorder_manager.active_terms) > 0: @@ -313,7 +310,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: self._manager_call_switch.call_stage( stage="ObservationManager_compute_no_history", warp_call={"fn": self.observation_manager.compute, "kwargs": {"return_cloned_output": False}}, - timer=TIMER_ENABLED_STEP, + timer=DEBUG_TIMER_STEP, ) self.recorder_manager.record_post_step() @@ -324,7 +321,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: with Timer( name="reset_selection", msg="Reset selection took:", - enable=TIMER_ENABLED_STEP, + enable=DEBUG_TIMER_STEP, time_unit="us", ): # Keep the reset-mask handoff fully in Warp when experimental termination buffers exist. @@ -343,7 +340,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: with Timer( name="reset_idx", msg="Reset idx took:", - enable=TIMER_ENABLED_STEP, + enable=DEBUG_TIMER_STEP, time_unit="us", ): self._reset_idx(env_ids=reset_env_ids, env_mask=self.reset_mask_wp) @@ -364,7 +361,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: self._manager_call_switch.call_stage( stage="EventManager_apply_interval", warp_call={"fn": self.event_manager.apply, "kwargs": {"mode": "interval", "dt": float(self.step_dt)}}, - timer=TIMER_ENABLED_STEP, + timer=DEBUG_TIMER_STEP, ) # -- compute observations @@ -376,7 +373,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn: "kwargs": {"update_history": True, "return_cloned_output": False}, "output": lambda r: clone_obs_buffer(r), }, - timer=TIMER_ENABLED_STEP, + timer=DEBUG_TIMER_STEP, ) # return observations, rewards, resets and extras return self.obs_buf, self.reward_buf, self.reset_terminated, self.reset_time_outs, self.extras @@ -526,7 +523,7 @@ def _reset_idx( with Timer( name="curriculum_manager.compute_reset", msg="CurriculumManager.compute (reset) took:", - enable=TIMER_ENABLED_RESET_IDX, + enable=DEBUG_TIMER_RESET, time_unit="us", ): self.curriculum_manager.compute(env_ids=env_ids) @@ -534,8 +531,8 @@ def _reset_idx( # reset the internal buffers of the scene elements self._manager_call_switch.call_stage( stage="Scene_reset", - warp_call={"fn": self.scene.reset, "kwargs": {"env_mask": env_mask}}, - timer=TIMER_ENABLED_RESET_IDX, + warp_call={"fn": self.scene.reset, "kwargs": {"env_ids": env_ids, "env_mask": env_mask}}, + timer=DEBUG_TIMER_RESET, ) if "reset" in self.event_manager.available_modes: @@ -550,7 +547,7 @@ def _reset_idx( "global_env_step_count": self._global_env_step_count_wp, }, }, - timer=TIMER_ENABLED_RESET_IDX, + timer=DEBUG_TIMER_RESET, ) # iterate over all managers and reset them @@ -560,24 +557,24 @@ def _reset_idx( obs_info = self._manager_call_switch.call_stage( stage="ObservationManager_reset", warp_call={"fn": self.observation_manager.reset, "kwargs": {"env_mask": env_mask}}, - timer=TIMER_ENABLED_RESET_IDX, + timer=DEBUG_TIMER_RESET, ) action_info = self._manager_call_switch.call_stage( stage="ActionManager_reset", warp_call={"fn": self.action_manager.reset, "kwargs": {"env_mask": env_mask}}, - timer=TIMER_ENABLED_RESET_IDX, + timer=DEBUG_TIMER_RESET, ) reward_info = self._manager_call_switch.call_stage( stage="RewardManager_reset", warp_call={"fn": self.reward_manager.reset, "kwargs": {"env_mask": env_mask}}, - timer=TIMER_ENABLED_RESET_IDX, + timer=DEBUG_TIMER_RESET, ) # -- curriculum manager with Timer( name="curriculum_manager.reset", msg="CurriculumManager.reset took:", - enable=TIMER_ENABLED_RESET_IDX, + enable=DEBUG_TIMER_RESET, time_unit="us", ): curriculum_info = self.curriculum_manager.reset(env_ids=env_ids) @@ -587,12 +584,14 @@ def _reset_idx( event_info = self._manager_call_switch.call_stage( stage="EventManager_reset", warp_call={"fn": self.event_manager.reset, "kwargs": {"env_mask": env_mask}}, - timer=TIMER_ENABLED_RESET_IDX, + stable_call={"fn": self.event_manager.reset, "kwargs": {"env_ids": env_ids}}, + timer=DEBUG_TIMER_RESET, ) termination_info = self._manager_call_switch.call_stage( stage="TerminationManager_reset", warp_call={"fn": self.termination_manager.reset, "kwargs": {"env_mask": env_mask}}, - timer=TIMER_ENABLED_RESET_IDX, + stable_call={"fn": self.termination_manager.reset, "kwargs": {"env_ids": env_ids}}, + timer=DEBUG_TIMER_RESET, ) # -- recorder manager diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/__init__.py index 283805a279fe..d295384149d4 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/__init__.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/__init__.py @@ -3,10 +3,10 @@ # # SPDX-License-Identifier: BSD-3-Clause -"""Experimental action terms (minimal). +"""Experimental action terms (Warp-first). -Only the action configs/terms currently required by the experimental manager-based Cartpole task -are provided here. +Provides Warp-first action term implementations overriding the stable +:mod:`isaaclab.envs.mdp.actions` module. """ from .actions_cfg import * # noqa: F401, F403 diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/actions_cfg.py b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/actions_cfg.py index d8826602dbe3..39d5b29c6fdb 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/actions_cfg.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/actions_cfg.py @@ -3,12 +3,6 @@ # # SPDX-License-Identifier: BSD-3-Clause -"""Action term configuration (experimental, minimal). - -This module mirrors the stable :mod:`isaaclab.envs.mdp.actions.actions_cfg` but only keeps what -the experimental Cartpole task needs. -""" - from dataclasses import MISSING from isaaclab.utils import configclass @@ -17,26 +11,51 @@ from . import joint_actions +## +# Joint actions. +## + @configclass class JointActionCfg(ActionTermCfg): - """Configuration for the base joint action term.""" + """Configuration for the base joint action term. + + See :class:`JointAction` for more details. + """ joint_names: list[str] = MISSING """List of joint names or regex expressions that the action will be mapped to.""" - scale: float | dict[str, float] = 1.0 """Scale factor for the action (float or dict of regex expressions). Defaults to 1.0.""" - offset: float | dict[str, float] = 0.0 """Offset factor for the action (float or dict of regex expressions). Defaults to 0.0.""" - preserve_order: bool = False """Whether to preserve the order of the joint names in the action output. Defaults to False.""" +@configclass +class JointPositionActionCfg(JointActionCfg): + """Configuration for the joint position action term. + + See :class:`JointPositionAction` for more details. + """ + + class_type: type[ActionTerm] = joint_actions.JointPositionAction + + use_default_offset: bool = True + """Whether to use default joint positions configured in the articulation asset as offset. + Defaults to True. + + If True, this flag results in overwriting the values of :attr:`offset` to the default joint positions + from the articulation asset. + """ + + @configclass class JointEffortActionCfg(JointActionCfg): - """Configuration for the joint effort action term.""" + """Configuration for the joint effort action term. + + See :class:`JointEffortAction` for more details. + """ class_type: type[ActionTerm] = joint_actions.JointEffortAction diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/joint_actions.py b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/joint_actions.py index 183cb2cfe49a..441bde86b4de 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/joint_actions.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/actions/joint_actions.py @@ -15,7 +15,7 @@ from isaaclab.assets.articulation import Articulation from isaaclab_experimental.managers.action_manager import ActionTerm -from isaaclab_experimental.utils.warp import resolve_1d_mask +from isaaclab_experimental.utils.warp import resolve_1d_mask, zero_masked_2d if TYPE_CHECKING: from isaaclab.envs import ManagerBasedEnv @@ -56,13 +56,6 @@ def _process_joint_actions_kernel( processed_out[env_id, j] = x -@wp.kernel -def _zero_masked_2d(mask: wp.array(dtype=wp.bool), values: wp.array(dtype=wp.float32, ndim=2)): - env_id, j = wp.tid() - if mask[env_id]: - values[env_id, j] = 0.0 - - class JointAction(ActionTerm): r"""Base class for joint actions. @@ -259,13 +252,37 @@ def reset(self, env_mask: wp.array | None = None) -> None: self._raw_actions.fill_(0.0) return wp.launch( - kernel=_zero_masked_2d, + kernel=zero_masked_2d, dim=(self.num_envs, self.action_dim), inputs=[env_mask, self._raw_actions], device=self.device, ) +class JointPositionAction(JointAction): + """Joint action term that applies the processed actions to the articulation's joints as position commands. + + Warp-first override of :class:`isaaclab.envs.mdp.actions.JointPositionAction`. + """ + + cfg: actions_cfg.JointPositionActionCfg + """The configuration of the action term.""" + + def __init__(self, cfg: actions_cfg.JointPositionActionCfg, env: ManagerBasedEnv): + super().__init__(cfg, env) + # use default joint positions as offset + if cfg.use_default_offset: + defaults_np = self._asset.data.default_joint_pos.numpy() + if isinstance(self._joint_ids, slice): + offset_vals = defaults_np[0, :].tolist() + else: + offset_vals = [float(defaults_np[0, jid]) for jid in self._joint_ids] + self._offset = wp.array(offset_vals, dtype=wp.float32, device=self.device) + + def apply_actions(self): + self._asset.set_joint_position_target_index(target=self.processed_actions, joint_ids=self._joint_ids_wp) + + class JointEffortAction(JointAction): """Joint action term that applies the processed actions to the articulation's joints as effort commands.""" diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/events.py b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/events.py index 053da8db7225..1e9dad36ddaa 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/events.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/events.py @@ -12,7 +12,7 @@ - Stable event terms (e.g. `isaaclab.envs.mdp.events.reset_joints_by_offset`) often build torch tensors and then call into Newton articulation writers with partial indices (env_ids/joint_ids). - On the Newton backend, passing torch tensors triggers expensive torch->warp conversions that currently allocate - full `(num_envs, num_joints)` buffers (see `isaaclab.utils.warp.utils.make_complete_data_from_torch_dual_index`). + full `(num_envs, num_joints)` buffers. These Warp-first implementations avoid that by writing directly into the sim-bound Warp state buffers (`asset.data.joint_pos` / `asset.data.joint_vel`) for the selected envs/joints. @@ -29,6 +29,370 @@ from isaaclab.assets import Articulation from isaaclab_experimental.managers import SceneEntityCfg +from isaaclab_experimental.utils.warp import WarpCapturable + +# --------------------------------------------------------------------------- +# Randomize rigid body center of mass +# --------------------------------------------------------------------------- + + +@wp.kernel +def _randomize_com_kernel( + env_mask: wp.array(dtype=wp.bool), + rng_state: wp.array(dtype=wp.uint32), + body_com_pos_b: wp.array(dtype=wp.vec3f, ndim=2), + body_ids: wp.array(dtype=wp.int32), + com_lo: wp.vec3f, + com_hi: wp.vec3f, +): + """Add random offset to center of mass positions for selected bodies.""" + env_id = wp.tid() + if not env_mask[env_id]: + return + + state = rng_state[env_id] + for k in range(body_ids.shape[0]): + b = body_ids[k] + v = body_com_pos_b[env_id, b] + dx = wp.randf(state, com_lo[0], com_hi[0]) + dy = wp.randf(state, com_lo[1], com_hi[1]) + dz = wp.randf(state, com_lo[2], com_hi[2]) + body_com_pos_b[env_id, b] = wp.vec3f(v[0] + dx, v[1] + dy, v[2] + dz) + rng_state[env_id] = state + + +@WarpCapturable(False, reason="set_coms_mask calls SimulationManager.add_model_change") +def randomize_rigid_body_com( + env, + env_mask: wp.array, + com_range: dict[str, tuple[float, float]], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +): + """Randomize the center of mass (CoM) of rigid bodies by adding random offsets. + + Warp-first override of :func:`isaaclab.envs.mdp.events.randomize_rigid_body_com`. + Writes directly into the sim-bound ``body_com_pos_b`` buffer, then notifies the solver + via :meth:`set_coms_mask` so it recomputes inertial properties. + """ + asset: Articulation = env.scene[asset_cfg.name] + + fn = randomize_rigid_body_com + if not getattr(fn, "_is_warmed_up", False) or fn._asset_name != asset_cfg.name: + fn._asset_name = asset_cfg.name + r = [com_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z"]] + fn._com_lo = wp.vec3f(r[0][0], r[1][0], r[2][0]) + fn._com_hi = wp.vec3f(r[0][1], r[1][1], r[2][1]) + fn._is_warmed_up = True + + wp.launch( + kernel=_randomize_com_kernel, + dim=env.num_envs, + inputs=[ + env_mask, + env.rng_state_wp, + asset.data.body_com_pos_b, + asset_cfg.body_ids_wp, + fn._com_lo, + fn._com_hi, + ], + device=env.device, + ) + + # Notify the solver that inertial properties changed (COM position affects inertia). + asset.set_coms_mask(coms=asset.data.body_com_pos_b, env_mask=env_mask) + + +# --------------------------------------------------------------------------- +# Apply external force and torque +# --------------------------------------------------------------------------- + + +@wp.kernel +def _apply_external_force_torque_kernel( + env_mask: wp.array(dtype=wp.bool), + rng_state: wp.array(dtype=wp.uint32), + force_out: wp.array(dtype=wp.vec3f, ndim=2), + torque_out: wp.array(dtype=wp.vec3f, ndim=2), + force_lo: float, + force_hi: float, + torque_lo: float, + torque_hi: float, +): + env_id = wp.tid() + if not env_mask[env_id]: + # zero out unmasked envs so they don't accumulate stale forces + for b in range(force_out.shape[1]): + force_out[env_id, b] = wp.vec3f(0.0, 0.0, 0.0) + torque_out[env_id, b] = wp.vec3f(0.0, 0.0, 0.0) + return + + state = rng_state[env_id] + for b in range(force_out.shape[1]): + force_out[env_id, b] = wp.vec3f( + wp.randf(state, force_lo, force_hi), + wp.randf(state, force_lo, force_hi), + wp.randf(state, force_lo, force_hi), + ) + torque_out[env_id, b] = wp.vec3f( + wp.randf(state, torque_lo, torque_hi), + wp.randf(state, torque_lo, torque_hi), + wp.randf(state, torque_lo, torque_hi), + ) + rng_state[env_id] = state + + +def apply_external_force_torque( + env, + env_mask: wp.array, + force_range: tuple[float, float], + torque_range: tuple[float, float], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +): + """Randomize external forces and torques applied to the asset's bodies. + + Warp-first override of :func:`isaaclab.envs.mdp.events.apply_external_force_torque`. + """ + asset: Articulation = env.scene[asset_cfg.name] + + # First-call: allocate scratch and pre-convert constant arguments. + if not getattr(apply_external_force_torque, "_is_warmed_up", False): + apply_external_force_torque._scratch_forces = wp.zeros( + (env.num_envs, asset.num_bodies), dtype=wp.vec3f, device=env.device + ) + apply_external_force_torque._scratch_torques = wp.zeros( + (env.num_envs, asset.num_bodies), dtype=wp.vec3f, device=env.device + ) + apply_external_force_torque._is_warmed_up = True + + wp.launch( + kernel=_apply_external_force_torque_kernel, + dim=env.num_envs, + inputs=[ + env_mask, + env.rng_state_wp, + apply_external_force_torque._scratch_forces, + apply_external_force_torque._scratch_torques, + force_range[0], + force_range[1], + torque_range[0], + torque_range[1], + ], + device=env.device, + ) + + asset.permanent_wrench_composer.set_forces_and_torques_mask( + forces=apply_external_force_torque._scratch_forces, + torques=apply_external_force_torque._scratch_torques, + env_mask=env_mask, + ) + + +# --------------------------------------------------------------------------- +# Push by velocity +# --------------------------------------------------------------------------- + + +@wp.kernel +def _push_by_setting_velocity_kernel( + env_mask: wp.array(dtype=wp.bool), + rng_state: wp.array(dtype=wp.uint32), + root_vel_w: wp.array(dtype=wp.spatial_vectorf), + vel_out: wp.array(dtype=wp.spatial_vectorf), + lin_lo: wp.vec3f, + lin_hi: wp.vec3f, + ang_lo: wp.vec3f, + ang_hi: wp.vec3f, +): + env_id = wp.tid() + if not env_mask[env_id]: + return + + vel = root_vel_w[env_id] + state = rng_state[env_id] + + vel_out[env_id] = wp.spatial_vectorf( + vel[0] + wp.randf(state, lin_lo[0], lin_hi[0]), + vel[1] + wp.randf(state, lin_lo[1], lin_hi[1]), + vel[2] + wp.randf(state, lin_lo[2], lin_hi[2]), + vel[3] + wp.randf(state, ang_lo[0], ang_hi[0]), + vel[4] + wp.randf(state, ang_lo[1], ang_hi[1]), + vel[5] + wp.randf(state, ang_lo[2], ang_hi[2]), + ) + + rng_state[env_id] = state + + +def push_by_setting_velocity( + env, + env_mask: wp.array, + velocity_range: dict[str, tuple[float, float]], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +): + """Push the asset by setting the root velocity to a random value within the given ranges. + + Warp-first override of :func:`isaaclab.envs.mdp.events.push_by_setting_velocity`. + """ + asset: Articulation = env.scene[asset_cfg.name] + + # First-call: allocate scratch and pre-parse constant range arguments. + if not getattr(push_by_setting_velocity, "_is_warmed_up", False): + push_by_setting_velocity._scratch_vel = wp.zeros((env.num_envs,), dtype=wp.spatial_vectorf, device=env.device) + r = [velocity_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]] + push_by_setting_velocity._lin_lo = wp.vec3f(r[0][0], r[1][0], r[2][0]) + push_by_setting_velocity._lin_hi = wp.vec3f(r[0][1], r[1][1], r[2][1]) + push_by_setting_velocity._ang_lo = wp.vec3f(r[3][0], r[4][0], r[5][0]) + push_by_setting_velocity._ang_hi = wp.vec3f(r[3][1], r[4][1], r[5][1]) + push_by_setting_velocity._is_warmed_up = True + + wp.launch( + kernel=_push_by_setting_velocity_kernel, + dim=env.num_envs, + inputs=[ + env_mask, + env.rng_state_wp, + asset.data.root_vel_w, + push_by_setting_velocity._scratch_vel, + push_by_setting_velocity._lin_lo, + push_by_setting_velocity._lin_hi, + push_by_setting_velocity._ang_lo, + push_by_setting_velocity._ang_hi, + ], + device=env.device, + ) + + asset.write_root_velocity_to_sim_mask(root_velocity=push_by_setting_velocity._scratch_vel, env_mask=env_mask) + + +# --------------------------------------------------------------------------- +# Reset root state uniform +# --------------------------------------------------------------------------- + + +@wp.kernel +def _reset_root_state_uniform_kernel( + env_mask: wp.array(dtype=wp.bool), + rng_state: wp.array(dtype=wp.uint32), + default_root_pose: wp.array(dtype=wp.transformf), + default_root_vel: wp.array(dtype=wp.spatial_vectorf), + env_origins: wp.array(dtype=wp.vec3f), + pose_out: wp.array(dtype=wp.transformf), + vel_out: wp.array(dtype=wp.spatial_vectorf), + pos_lo: wp.vec3f, + pos_hi: wp.vec3f, + rot_lo: wp.vec3f, + rot_hi: wp.vec3f, + vel_lin_lo: wp.vec3f, + vel_lin_hi: wp.vec3f, + vel_ang_lo: wp.vec3f, + vel_ang_hi: wp.vec3f, +): + env_id = wp.tid() + if not env_mask[env_id]: + return + + state = rng_state[env_id] + + # --- Pose --- + default_pose = default_root_pose[env_id] + default_pos = wp.transform_get_translation(default_pose) + default_q = wp.transform_get_rotation(default_pose) + origin = env_origins[env_id] + + # position = default + env_origin + random offset + pos = wp.vec3f( + default_pos[0] + origin[0] + wp.randf(state, pos_lo[0], pos_hi[0]), + default_pos[1] + origin[1] + wp.randf(state, pos_lo[1], pos_hi[1]), + default_pos[2] + origin[2] + wp.randf(state, pos_lo[2], pos_hi[2]), + ) + + # orientation = default * delta(euler_xyz) + roll = wp.randf(state, rot_lo[0], rot_hi[0]) + pitch = wp.randf(state, rot_lo[1], rot_hi[1]) + yaw = wp.randf(state, rot_lo[2], rot_hi[2]) + qx = wp.quat_from_axis_angle(wp.vec3f(1.0, 0.0, 0.0), roll) + qy = wp.quat_from_axis_angle(wp.vec3f(0.0, 1.0, 0.0), pitch) + qz = wp.quat_from_axis_angle(wp.vec3f(0.0, 0.0, 1.0), yaw) + # ZYX extrinsic = XYZ intrinsic: delta = qz * qy * qx + delta_q = wp.mul(wp.mul(qz, qy), qx) + final_q = wp.mul(default_q, delta_q) + + pose_out[env_id] = wp.transformf(pos, final_q) + + # --- Velocity --- + default_vel = default_root_vel[env_id] + vel_out[env_id] = wp.spatial_vectorf( + default_vel[0] + wp.randf(state, vel_lin_lo[0], vel_lin_hi[0]), + default_vel[1] + wp.randf(state, vel_lin_lo[1], vel_lin_hi[1]), + default_vel[2] + wp.randf(state, vel_lin_lo[2], vel_lin_hi[2]), + default_vel[3] + wp.randf(state, vel_ang_lo[0], vel_ang_hi[0]), + default_vel[4] + wp.randf(state, vel_ang_lo[1], vel_ang_hi[1]), + default_vel[5] + wp.randf(state, vel_ang_lo[2], vel_ang_hi[2]), + ) + + rng_state[env_id] = state + + +def reset_root_state_uniform( + env, + env_mask: wp.array, + pose_range: dict[str, tuple[float, float]], + velocity_range: dict[str, tuple[float, float]], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +): + """Reset the asset root state to a random position and velocity uniformly within the given ranges. + + Warp-first override of :func:`isaaclab.envs.mdp.events.reset_root_state_uniform`. + """ + asset: Articulation = env.scene[asset_cfg.name] + + # First-call: allocate scratch and pre-parse range dicts. + if not getattr(reset_root_state_uniform, "_is_warmed_up", False): + reset_root_state_uniform._scratch_pose = wp.zeros((env.num_envs,), dtype=wp.transformf, device=env.device) + reset_root_state_uniform._scratch_vel = wp.zeros((env.num_envs,), dtype=wp.spatial_vectorf, device=env.device) + # Pre-parse pose_range dict + p = [pose_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]] + reset_root_state_uniform._pos_lo = wp.vec3f(p[0][0], p[1][0], p[2][0]) + reset_root_state_uniform._pos_hi = wp.vec3f(p[0][1], p[1][1], p[2][1]) + reset_root_state_uniform._rot_lo = wp.vec3f(p[3][0], p[4][0], p[5][0]) + reset_root_state_uniform._rot_hi = wp.vec3f(p[3][1], p[4][1], p[5][1]) + # Pre-parse velocity_range dict + v = [velocity_range.get(key, (0.0, 0.0)) for key in ["x", "y", "z", "roll", "pitch", "yaw"]] + reset_root_state_uniform._vel_lin_lo = wp.vec3f(v[0][0], v[1][0], v[2][0]) + reset_root_state_uniform._vel_lin_hi = wp.vec3f(v[0][1], v[1][1], v[2][1]) + reset_root_state_uniform._vel_ang_lo = wp.vec3f(v[3][0], v[4][0], v[5][0]) + reset_root_state_uniform._vel_ang_hi = wp.vec3f(v[3][1], v[4][1], v[5][1]) + reset_root_state_uniform._is_warmed_up = True + + wp.launch( + kernel=_reset_root_state_uniform_kernel, + dim=env.num_envs, + inputs=[ + env_mask, + env.rng_state_wp, + asset.data.default_root_pose, + asset.data.default_root_vel, + env.env_origins_wp, + reset_root_state_uniform._scratch_pose, + reset_root_state_uniform._scratch_vel, + reset_root_state_uniform._pos_lo, + reset_root_state_uniform._pos_hi, + reset_root_state_uniform._rot_lo, + reset_root_state_uniform._rot_hi, + reset_root_state_uniform._vel_lin_lo, + reset_root_state_uniform._vel_lin_hi, + reset_root_state_uniform._vel_ang_lo, + reset_root_state_uniform._vel_ang_hi, + ], + device=env.device, + ) + + asset.write_root_pose_to_sim_mask(root_pose=reset_root_state_uniform._scratch_pose, env_mask=env_mask) + asset.write_root_velocity_to_sim_mask(root_velocity=reset_root_state_uniform._scratch_vel, env_mask=env_mask) + + +# --------------------------------------------------------------------------- +# Reset joints by offset +# --------------------------------------------------------------------------- @wp.kernel @@ -124,6 +488,10 @@ def reset_joints_by_offset( device=env.device, ) + # Sync derived buffers (_previous_joint_vel, joint_acc) for reset envs. + asset.write_joint_position_to_sim_mask(position=asset.data.joint_pos, env_mask=env_mask) + asset.write_joint_velocity_to_sim_mask(velocity=asset.data.joint_vel, env_mask=env_mask) + @wp.kernel def _reset_joints_by_scale_kernel( @@ -210,3 +578,7 @@ def reset_joints_by_scale( ], device=env.device, ) + + # Sync derived buffers (_previous_joint_vel, joint_acc) for reset envs. + asset.write_joint_position_to_sim_mask(position=asset.data.joint_pos, env_mask=env_mask) + asset.write_joint_velocity_to_sim_mask(velocity=asset.data.joint_vel, env_mask=env_mask) diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/observations.py b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/observations.py index ae6b1998588d..29fdedfd98b0 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/observations.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/observations.py @@ -3,30 +3,39 @@ # # SPDX-License-Identifier: BSD-3-Clause -"""Warp-first observation terms (experimental, Cartpole-focused). +"""Warp-first observation terms (experimental). All functions in this file follow the Warp-compatible observation signature expected by the experimental Warp-first observation manager: - ``func(env, out, **params) -> None`` -where ``out`` is a pre-allocated Warp array with float32 dtype and shape ``(num_envs, term_dim)``. +where ``out`` is a pre-allocated Warp array with float32 dtype and shape ``(num_envs, D)``. +Output dimension ``D`` is inferred from decorator metadata: ``axes`` for root-state terms, +``out_dim`` for body/command/action/time terms, or ``joint_ids`` count for joint terms. """ from __future__ import annotations from typing import TYPE_CHECKING +import torch import warp as wp +from isaaclab_newton.kernels.state_kernels import ( + body_ang_vel_from_root, + body_lin_vel_from_root, + rotate_vec_to_body_frame, +) from isaaclab.assets import Articulation from isaaclab_experimental.envs.utils.io_descriptors import ( generic_io_descriptor_warp, + record_dtype, record_joint_names, record_joint_pos_offsets, - record_joint_shape, record_joint_vel_offsets, + record_shape, ) from isaaclab_experimental.managers import SceneEntityCfg @@ -34,21 +43,191 @@ from isaaclab.envs import ManagerBasedEnv +# --------------------------------------------------------------------------- +# Shared kernels +# --------------------------------------------------------------------------- + + @wp.kernel -def _joint_pos_rel_gather_kernel( - joint_pos: wp.array(dtype=wp.float32, ndim=2), - default_joint_pos: wp.array(dtype=wp.float32, ndim=2), +def _vec3_to_out3_kernel( + src: wp.array(dtype=wp.vec3f), + out: wp.array(dtype=wp.float32, ndim=2), +): + env_id = wp.tid() + v = src[env_id] + out[env_id, 0] = v[0] + out[env_id, 1] = v[1] + out[env_id, 2] = v[2] + + +@wp.kernel +def _joint_gather_kernel( + src: wp.array(dtype=wp.float32, ndim=2), + joint_ids: wp.array(dtype=wp.int32), + out: wp.array(dtype=wp.float32, ndim=2), +): + env_id, k = wp.tid() + j = joint_ids[k] + out[env_id, k] = src[env_id, j] + + +""" +Root state. +""" + + +@wp.kernel +def _base_pos_z_kernel( + root_pos_w: wp.array(dtype=wp.vec3f), + out: wp.array(dtype=wp.float32, ndim=2), +): + env_id = wp.tid() + out[env_id, 0] = root_pos_w[env_id][2] + + +@generic_io_descriptor_warp( + units="m", axes=["Z"], observation_type="RootState", on_inspect=[record_shape, record_dtype] +) +def base_pos_z(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Root height in the simulation world frame.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_base_pos_z_kernel, + dim=env.num_envs, + inputs=[asset.data.root_pos_w, out], + device=env.device, + ) + + +# Inline Tier 1 access: these observations derive body-frame quantities directly from +# root_link_pose_w (transformf) and root_com_vel_w (spatial_vectorf), avoiding the lazy +# TimestampedWarpBuffer properties which are not CUDA-graph-capturable. +# See GRAPH_CAPTURE_MIGRATION.md in isaaclab_newton for background. +# If ArticulationData Tier 2 lazy update is made graph-safe in the future, these can +# revert to reading the pre-computed .data buffers (simpler, avoids redundant rotations). + + +@wp.kernel +def _base_lin_vel_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + root_vel_w: wp.array(dtype=wp.spatial_vectorf), + out: wp.array(dtype=wp.float32, ndim=2), +): + i = wp.tid() + v = body_lin_vel_from_root(root_pose_w[i], root_vel_w[i]) + out[i, 0] = v[0] + out[i, 1] = v[1] + out[i, 2] = v[2] + + +@generic_io_descriptor_warp( + units="m/s", axes=["X", "Y", "Z"], observation_type="RootState", on_inspect=[record_shape, record_dtype] +) +def base_lin_vel(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Root linear velocity in the asset's root frame.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_base_lin_vel_kernel, + dim=env.num_envs, + inputs=[asset.data.root_link_pose_w, asset.data.root_com_vel_w, out], + device=env.device, + ) + + +@wp.kernel +def _base_ang_vel_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + root_vel_w: wp.array(dtype=wp.spatial_vectorf), + out: wp.array(dtype=wp.float32, ndim=2), +): + i = wp.tid() + v = body_ang_vel_from_root(root_pose_w[i], root_vel_w[i]) + out[i, 0] = v[0] + out[i, 1] = v[1] + out[i, 2] = v[2] + + +@generic_io_descriptor_warp( + units="rad/s", axes=["X", "Y", "Z"], observation_type="RootState", on_inspect=[record_shape, record_dtype] +) +def base_ang_vel(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Root angular velocity in the asset's root frame.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_base_ang_vel_kernel, + dim=env.num_envs, + inputs=[asset.data.root_link_pose_w, asset.data.root_com_vel_w, out], + device=env.device, + ) + + +@wp.kernel +def _projected_gravity_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + gravity_w: wp.array(dtype=wp.vec3f), + out: wp.array(dtype=wp.float32, ndim=2), +): + i = wp.tid() + g = rotate_vec_to_body_frame(gravity_w[0], root_pose_w[i]) + out[i, 0] = g[0] + out[i, 1] = g[1] + out[i, 2] = g[2] + + +@generic_io_descriptor_warp( + units="m/s^2", axes=["X", "Y", "Z"], observation_type="RootState", on_inspect=[record_shape, record_dtype] +) +def projected_gravity(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Gravity projection on the asset's root frame.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_projected_gravity_kernel, + dim=env.num_envs, + inputs=[asset.data.root_link_pose_w, asset.data.GRAVITY_VEC_W, out], + device=env.device, + ) + + +""" +Joint state. +""" + + +@generic_io_descriptor_warp( + observation_type="JointState", on_inspect=[record_joint_names, record_dtype, record_shape], units="rad" +) +def joint_pos(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """The joint positions of the asset.""" + asset: Articulation = env.scene[asset_cfg.name] + joint_ids_wp = getattr(asset_cfg, "joint_ids_wp", None) + if joint_ids_wp is None: + raise RuntimeError( + "SceneEntityCfg.joint_ids_wp is required for subset joint observations in Warp-first observations. " + "Pass `asset_cfg` via term cfg params so it is resolved at manager init." + ) + wp.launch( + kernel=_joint_gather_kernel, + dim=(env.num_envs, out.shape[1]), + inputs=[asset.data.joint_pos, joint_ids_wp, out], + device=env.device, + ) + + +@wp.kernel +def _joint_rel_gather_kernel( + values: wp.array(dtype=wp.float32, ndim=2), + defaults: wp.array(dtype=wp.float32, ndim=2), joint_ids: wp.array(dtype=wp.int32), out: wp.array(dtype=wp.float32, ndim=2), ): env_id, k = wp.tid() j = joint_ids[k] - out[env_id, k] = joint_pos[env_id, j] - default_joint_pos[env_id, j] + out[env_id, k] = values[env_id, j] - defaults[env_id, j] @generic_io_descriptor_warp( observation_type="JointState", - on_inspect=[record_joint_names, record_joint_shape, record_joint_pos_offsets], + on_inspect=[record_joint_names, record_dtype, record_shape, record_joint_pos_offsets], units="rad", ) def joint_pos_rel(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: @@ -63,7 +242,7 @@ def joint_pos_rel(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEn "Pass `asset_cfg` via term cfg params so it is resolved at manager init." ) wp.launch( - kernel=_joint_pos_rel_gather_kernel, + kernel=_joint_rel_gather_kernel, dim=(env.num_envs, out.shape[1]), inputs=[asset.data.joint_pos, asset.data.default_joint_pos, joint_ids_wp, out], device=env.device, @@ -71,20 +250,62 @@ def joint_pos_rel(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEn @wp.kernel -def _joint_vel_rel_gather_kernel( - joint_vel: wp.array(dtype=wp.float32, ndim=2), - default_joint_vel: wp.array(dtype=wp.float32, ndim=2), +def _joint_pos_limit_normalized_kernel( + joint_pos: wp.array(dtype=wp.float32, ndim=2), + soft_joint_pos_limits: wp.array(dtype=wp.vec2f, ndim=2), joint_ids: wp.array(dtype=wp.int32), out: wp.array(dtype=wp.float32, ndim=2), ): env_id, k = wp.tid() j = joint_ids[k] - out[env_id, k] = joint_vel[env_id, j] - default_joint_vel[env_id, j] + pos = joint_pos[env_id, j] + lim = soft_joint_pos_limits[env_id, j] + lower = lim.x + upper = lim.y + out[env_id, k] = 2.0 * (pos - (lower + upper) * 0.5) / (upper - lower) + + +@generic_io_descriptor_warp(observation_type="JointState", on_inspect=[record_joint_names, record_dtype, record_shape]) +def joint_pos_limit_normalized(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """The joint positions of the asset normalized with the asset's joint limits.""" + asset: Articulation = env.scene[asset_cfg.name] + joint_ids_wp = getattr(asset_cfg, "joint_ids_wp", None) + if joint_ids_wp is None: + raise RuntimeError( + "SceneEntityCfg.joint_ids_wp is required for subset joint observations in Warp-first observations. " + "Pass `asset_cfg` via term cfg params so it is resolved at manager init." + ) + wp.launch( + kernel=_joint_pos_limit_normalized_kernel, + dim=(env.num_envs, out.shape[1]), + inputs=[asset.data.joint_pos, asset.data.soft_joint_pos_limits, joint_ids_wp, out], + device=env.device, + ) + + +@generic_io_descriptor_warp( + observation_type="JointState", on_inspect=[record_joint_names, record_dtype, record_shape], units="rad/s" +) +def joint_vel(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """The joint velocities of the asset.""" + asset: Articulation = env.scene[asset_cfg.name] + joint_ids_wp = getattr(asset_cfg, "joint_ids_wp", None) + if joint_ids_wp is None: + raise RuntimeError( + "SceneEntityCfg.joint_ids_wp is required for subset joint observations in Warp-first observations. " + "Pass `asset_cfg` via term cfg params so it is resolved at manager init." + ) + wp.launch( + kernel=_joint_gather_kernel, + dim=(env.num_envs, out.shape[1]), + inputs=[asset.data.joint_vel, joint_ids_wp, out], + device=env.device, + ) @generic_io_descriptor_warp( observation_type="JointState", - on_inspect=[record_joint_names, record_joint_shape, record_joint_vel_offsets], + on_inspect=[record_joint_names, record_dtype, record_shape, record_joint_vel_offsets], units="rad/s", ) def joint_vel_rel(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: @@ -99,8 +320,51 @@ def joint_vel_rel(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEn "Pass `asset_cfg` via term cfg params so it is resolved at manager init." ) wp.launch( - kernel=_joint_vel_rel_gather_kernel, + kernel=_joint_rel_gather_kernel, dim=(env.num_envs, out.shape[1]), inputs=[asset.data.joint_vel, asset.data.default_joint_vel, joint_ids_wp, out], device=env.device, ) + + +""" +Actions. +""" + + +@generic_io_descriptor_warp(out_dim="action", dtype=torch.float32, observation_type="Action", on_inspect=[record_shape]) +def last_action(env: ManagerBasedEnv, out, action_name: str | None = None) -> None: + """The last input action to the environment.""" + # TODO(warp-migration): Cross-manager access (observation → action). Currently works + # because experimental ActionManager.action is already a warp array. No from_torch needed. + if action_name is not None: + raise NotImplementedError("Named action support is not yet implemented for Warp-first last_action observation.") + wp.copy(out, env.action_manager.action) + + +""" +Commands. +""" + + +@generic_io_descriptor_warp( + out_dim="command", dtype=torch.float32, observation_type="Command", on_inspect=[record_shape] +) +def generated_commands(env: ManagerBasedEnv, out, command_name: str) -> None: + """The generated command from the command manager. Writes into ``out``. + + Warp-first override of :func:`isaaclab.envs.mdp.observations.generated_commands`. + Uses ``wp.from_torch`` to create a zero-copy warp view of the command tensor on first call. + """ + # TODO(warp-migration): Cross-manager access (observation → command). Replace with direct + # warp getter once all managers are guaranteed to be warp-native. + fn = generated_commands + if not getattr(fn, "_is_warmed_up", False) or fn._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + if isinstance(cmd, wp.array): + fn._cmd_wp = cmd + else: + fn._cmd_wp = wp.from_torch(cmd) + fn._cmd_name = command_name + fn._is_warmed_up = True + wp.copy(out, fn._cmd_wp) diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/rewards.py b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/rewards.py index ef34627eb54d..b1a1a5ddfd93 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/rewards.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/rewards.py @@ -5,9 +5,6 @@ """Common functions that can be used to enable reward functions (experimental). -This module is intentionally minimal: it only contains reward terms that are currently -used by the experimental manager-based Cartpole task. - All functions in this file follow the Warp-compatible reward signature expected by `isaaclab_experimental.managers.RewardManager`: @@ -21,6 +18,11 @@ from typing import TYPE_CHECKING import warp as wp +from isaaclab_newton.kernels.state_kernels import ( + body_ang_vel_from_root, + body_lin_vel_from_root, + rotate_vec_to_body_frame, +) from isaaclab.assets import Articulation @@ -67,11 +69,116 @@ def is_terminated(env: ManagerBasedRLEnv, out) -> None: ) +""" +Root penalties. +""" + + +# Inline Tier 1 access: these rewards derive body-frame quantities directly from +# root_link_pose_w (transformf) and root_com_vel_w (spatial_vectorf), avoiding the lazy +# TimestampedWarpBuffer properties which are not CUDA-graph-capturable. +# See GRAPH_CAPTURE_MIGRATION.md in isaaclab_newton for background. +# If ArticulationData Tier 2 lazy update is made graph-safe in the future, these can +# revert to reading the pre-computed .data buffers (simpler, avoids redundant rotations). + + +@wp.kernel +def _lin_vel_z_l2_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + root_vel_w: wp.array(dtype=wp.spatial_vectorf), + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + vz = body_lin_vel_from_root(root_pose_w[i], root_vel_w[i])[2] + out[i] = vz * vz + + +def lin_vel_z_l2(env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Penalize z-axis base linear velocity using L2 squared kernel.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_lin_vel_z_l2_kernel, + dim=env.num_envs, + inputs=[asset.data.root_link_pose_w, asset.data.root_com_vel_w, out], + device=env.device, + ) + + +@wp.kernel +def _ang_vel_xy_l2_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + root_vel_w: wp.array(dtype=wp.spatial_vectorf), + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + v = body_ang_vel_from_root(root_pose_w[i], root_vel_w[i]) + out[i] = v[0] * v[0] + v[1] * v[1] + + +def ang_vel_xy_l2(env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Penalize xy-axis base angular velocity using L2 squared kernel.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_ang_vel_xy_l2_kernel, + dim=env.num_envs, + inputs=[asset.data.root_link_pose_w, asset.data.root_com_vel_w, out], + device=env.device, + ) + + +@wp.kernel +def _flat_orientation_l2_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + gravity_w: wp.array(dtype=wp.vec3f), + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + g = rotate_vec_to_body_frame(gravity_w[0], root_pose_w[i]) + out[i] = g[0] * g[0] + g[1] * g[1] + + +def flat_orientation_l2(env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Penalize non-flat base orientation using L2 squared kernel.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_flat_orientation_l2_kernel, + dim=env.num_envs, + inputs=[asset.data.root_link_pose_w, asset.data.GRAVITY_VEC_W, out], + device=env.device, + ) + + """ Joint penalties. """ +# TODO(warp-migration): Revisit whether 2D kernel + wp.atomic_add is faster than 1D with inner loop +# for the following masked reduction kernels. Profile with typical joint counts (12-30). +@wp.kernel +def _sum_sq_masked_kernel( + x: wp.array(dtype=wp.float32, ndim=2), joint_mask: wp.array(dtype=wp.bool), out: wp.array(dtype=wp.float32) +): + i = wp.tid() + s = float(0.0) + for j in range(x.shape[1]): + if joint_mask[j]: + s += x[i, j] * x[i, j] + out[i] = s + + +def joint_torques_l2(env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Penalize joint torques applied on the articulation using L2 squared kernel.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_sum_sq_masked_kernel, + dim=env.num_envs, + inputs=[asset.data.applied_torque, asset_cfg.joint_mask, out], + device=env.device, + ) + + +# TODO(warp-migration): Revisit 2D kernel + wp.atomic_add vs 1D inner loop. @wp.kernel def _sum_abs_masked_kernel( x: wp.array(dtype=wp.float32, ndim=2), joint_mask: wp.array(dtype=wp.bool), out: wp.array(dtype=wp.float32) @@ -93,3 +200,291 @@ def joint_vel_l1(env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg) -> None inputs=[asset.data.joint_vel, asset_cfg.joint_mask, out], device=env.device, ) + + +def joint_vel_l2(env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Penalize joint velocities on the articulation using L2 squared kernel.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_sum_sq_masked_kernel, + dim=env.num_envs, + inputs=[asset.data.joint_vel, asset_cfg.joint_mask, out], + device=env.device, + ) + + +def joint_acc_l2(env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Penalize joint accelerations on the articulation using L2 squared kernel.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_sum_sq_masked_kernel, + dim=env.num_envs, + inputs=[asset.data.joint_acc, asset_cfg.joint_mask, out], + device=env.device, + ) + + +# TODO(warp-migration): Revisit 2D kernel + wp.atomic_add vs 1D inner loop. +@wp.kernel +def _sum_abs_diff_masked_kernel( + a: wp.array(dtype=wp.float32, ndim=2), + b: wp.array(dtype=wp.float32, ndim=2), + joint_mask: wp.array(dtype=wp.bool), + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + s = float(0.0) + for j in range(a.shape[1]): + if joint_mask[j]: + s += wp.abs(a[i, j] - b[i, j]) + out[i] = s + + +def joint_deviation_l1(env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Penalize joint positions that deviate from the default one.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_sum_abs_diff_masked_kernel, + dim=env.num_envs, + inputs=[asset.data.joint_pos, asset.data.default_joint_pos, asset_cfg.joint_mask, out], + device=env.device, + ) + + +# TODO(warp-migration): Revisit 2D kernel + wp.atomic_add vs 1D inner loop. +@wp.kernel +def _joint_pos_limits_kernel( + joint_pos: wp.array(dtype=wp.float32, ndim=2), + soft_joint_pos_limits: wp.array(dtype=wp.vec2f, ndim=2), + joint_mask: wp.array(dtype=wp.bool), + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + s = float(0.0) + for j in range(joint_pos.shape[1]): + if joint_mask[j]: + pos = joint_pos[i, j] + lim = soft_joint_pos_limits[i, j] + lower = lim.x + upper = lim.y + # penalty for exceeding lower limit + below = lower - pos + if below > 0.0: + s += below + # penalty for exceeding upper limit + above = pos - upper + if above > 0.0: + s += above + out[i] = s + + +def joint_pos_limits(env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Penalize joint positions if they cross the soft limits.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_joint_pos_limits_kernel, + dim=env.num_envs, + inputs=[asset.data.joint_pos, asset.data.soft_joint_pos_limits, asset_cfg.joint_mask, out], + device=env.device, + ) + + +""" +Action penalties. +""" + + +# TODO(warp-migration): Revisit 2D kernel + wp.atomic_add vs 1D inner loop. +@wp.kernel +def _sum_sq_diff_2d_kernel( + a: wp.array(dtype=wp.float32, ndim=2), + b: wp.array(dtype=wp.float32, ndim=2), + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + s = float(0.0) + for j in range(a.shape[1]): + d = a[i, j] - b[i, j] + s += d * d + out[i] = s + + +def action_rate_l2(env: ManagerBasedRLEnv, out) -> None: + """Penalize the rate of change of the actions using L2 squared kernel.""" + wp.launch( + kernel=_sum_sq_diff_2d_kernel, + dim=env.num_envs, + inputs=[env.action_manager.action, env.action_manager.prev_action, out], + device=env.device, + ) + + +# TODO(warp-migration): Revisit 2D kernel + wp.atomic_add vs 1D inner loop. +@wp.kernel +def _sum_sq_2d_kernel(x: wp.array(dtype=wp.float32, ndim=2), out: wp.array(dtype=wp.float32)): + i = wp.tid() + s = float(0.0) + for j in range(x.shape[1]): + s += x[i, j] * x[i, j] + out[i] = s + + +def action_l2(env: ManagerBasedRLEnv, out) -> None: + """Penalize the actions using L2 squared kernel.""" + wp.launch( + kernel=_sum_sq_2d_kernel, + dim=env.num_envs, + inputs=[env.action_manager.action, out], + device=env.device, + ) + + +""" +Contact sensor. +""" + + +@wp.kernel +def _undesired_contacts_kernel( + forces: wp.array(dtype=wp.vec3f, ndim=3), + body_ids: wp.array(dtype=wp.int32), + threshold: float, + out: wp.array(dtype=wp.float32), +): + """Count bodies where max-over-history contact force norm exceeds threshold.""" + i = wp.tid() + count = float(0.0) + for k in range(body_ids.shape[0]): + b = body_ids[k] + max_force = float(0.0) + for h in range(forces.shape[1]): + f = forces[i, h, b] + norm = wp.sqrt(f[0] * f[0] + f[1] * f[1] + f[2] * f[2]) + if norm > max_force: + max_force = norm + if max_force > threshold: + count += 1.0 + out[i] = count + + +def undesired_contacts(env: ManagerBasedRLEnv, out, threshold: float, sensor_cfg: SceneEntityCfg) -> None: + """Penalize undesired contacts as the number of violations above a threshold. Writes into ``out``. + + Warp-first override of :func:`isaaclab.envs.mdp.rewards.undesired_contacts`. + """ + contact_sensor = env.scene.sensors[sensor_cfg.name] + wp.launch( + kernel=_undesired_contacts_kernel, + dim=env.num_envs, + inputs=[contact_sensor.data.net_forces_w_history, sensor_cfg.body_ids_wp, threshold, out], + device=env.device, + ) + + +""" +Velocity-tracking rewards. +""" + + +@wp.kernel +def _track_lin_vel_xy_exp_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + root_vel_w: wp.array(dtype=wp.spatial_vectorf), + command: wp.array(dtype=wp.float32, ndim=2), + std_sq_inv: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + v = body_lin_vel_from_root(root_pose_w[i], root_vel_w[i]) + dx = command[i, 0] - v[0] + dy = command[i, 1] - v[1] + error = dx * dx + dy * dy + out[i] = wp.exp(-error * std_sq_inv) + + +def track_lin_vel_xy_exp( + env: ManagerBasedRLEnv, + out, + std: float, + command_name: str, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> None: + """Reward tracking of linear velocity commands (xy axes) using exponential kernel. Writes into ``out``. + + Warp-first override of :func:`isaaclab.envs.mdp.rewards.track_lin_vel_xy_exp`. + """ + asset: Articulation = env.scene[asset_cfg.name] + # cache the warp view of the command tensor on first call (zero-copy) + # TODO(warp-migration): Cross-manager access (reward → command). Replace with direct + # warp getter once all managers are guaranteed to be warp-native. + if not getattr(track_lin_vel_xy_exp, "_is_warmed_up", False) or track_lin_vel_xy_exp._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + if isinstance(cmd, wp.array): + track_lin_vel_xy_exp._cmd_wp = cmd + else: + track_lin_vel_xy_exp._cmd_wp = wp.from_torch(cmd) + track_lin_vel_xy_exp._cmd_name = command_name + track_lin_vel_xy_exp._is_warmed_up = True + wp.launch( + kernel=_track_lin_vel_xy_exp_kernel, + dim=env.num_envs, + inputs=[ + asset.data.root_link_pose_w, + asset.data.root_com_vel_w, + track_lin_vel_xy_exp._cmd_wp, + 1.0 / (std * std), + out, + ], + device=env.device, + ) + + +@wp.kernel +def _track_ang_vel_z_exp_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + root_vel_w: wp.array(dtype=wp.spatial_vectorf), + command: wp.array(dtype=wp.float32, ndim=2), + cmd_col: int, + std_sq_inv: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + dz = command[i, cmd_col] - body_ang_vel_from_root(root_pose_w[i], root_vel_w[i])[2] + out[i] = wp.exp(-dz * dz * std_sq_inv) + + +def track_ang_vel_z_exp( + env: ManagerBasedRLEnv, + out, + std: float, + command_name: str, + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> None: + """Reward tracking of angular velocity commands (yaw) using exponential kernel. Writes into ``out``. + + Warp-first override of :func:`isaaclab.envs.mdp.rewards.track_ang_vel_z_exp`. + """ + asset: Articulation = env.scene[asset_cfg.name] + # TODO(warp-migration): Cross-manager access (reward → command). Replace with direct + # warp getter once all managers are guaranteed to be warp-native. + if not getattr(track_ang_vel_z_exp, "_is_warmed_up", False) or track_ang_vel_z_exp._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + if isinstance(cmd, wp.array): + track_ang_vel_z_exp._cmd_wp = cmd + else: + track_ang_vel_z_exp._cmd_wp = wp.from_torch(cmd) + track_ang_vel_z_exp._cmd_name = command_name + track_ang_vel_z_exp._is_warmed_up = True + wp.launch( + kernel=_track_ang_vel_z_exp_kernel, + dim=env.num_envs, + inputs=[ + asset.data.root_link_pose_w, + asset.data.root_com_vel_w, + track_ang_vel_z_exp._cmd_wp, + 2, + 1.0 / (std * std), + out, + ], + device=env.device, + ) diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/terminations.py b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/terminations.py index a6b0cea4375e..9ea143e4e8f0 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/terminations.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/mdp/terminations.py @@ -5,9 +5,6 @@ """Common functions that can be used to activate terminations (experimental). -This module is intentionally minimal: it only contains termination terms that are currently -used by the experimental manager-based Cartpole task. - All functions in this file follow the Warp-compatible termination signature expected by `isaaclab_experimental.managers.TerminationManager`: @@ -30,6 +27,11 @@ from isaaclab.envs import ManagerBasedRLEnv +""" +MDP terminations. +""" + + @wp.kernel def _time_out_kernel( episode_length: wp.array(dtype=wp.int64), max_episode_length: wp.int64, out: wp.array(dtype=wp.bool) @@ -48,6 +50,39 @@ def time_out(env: ManagerBasedRLEnv, out) -> None: ) +""" +Root terminations. +""" + + +@wp.kernel +def _root_height_below_min_kernel( + root_pos_w: wp.array(dtype=wp.vec3f), + minimum_height: float, + out: wp.array(dtype=wp.bool), +): + i = wp.tid() + out[i] = root_pos_w[i][2] < minimum_height + + +def root_height_below_minimum( + env: ManagerBasedRLEnv, out, minimum_height: float, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> None: + """Terminate when the asset's root height is below the minimum height.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_root_height_below_min_kernel, + dim=env.num_envs, + inputs=[asset.data.root_pos_w, minimum_height, out], + device=env.device, + ) + + +""" +Joint terminations. +""" + + @wp.kernel def _joint_pos_out_of_manual_limit_kernel( joint_pos: wp.array(dtype=wp.float32, ndim=2), @@ -56,15 +91,12 @@ def _joint_pos_out_of_manual_limit_kernel( upper: float, out: wp.array(dtype=wp.bool), ): - i = wp.tid() - violated = bool(False) - for j in range(joint_pos.shape[1]): - if joint_mask[j]: - v = joint_pos[i, j] - if v < lower or v > upper: - violated = True - break - out[i] = violated + """2D kernel (num_envs, num_joints). ``out`` is pre-zeroed; only writes True.""" + i, j = wp.tid() + if joint_mask[j]: + v = joint_pos[i, j] + if v < lower or v > upper: + out[i] = True def joint_pos_out_of_manual_limit( @@ -84,7 +116,46 @@ def joint_pos_out_of_manual_limit( ) wp.launch( kernel=_joint_pos_out_of_manual_limit_kernel, - dim=env.num_envs, + dim=(env.num_envs, asset.data.joint_pos.shape[1]), inputs=[asset.data.joint_pos, asset_cfg.joint_mask, bounds[0], bounds[1], out], device=env.device, ) + + +""" +Contact sensor. +""" + + +@wp.kernel +def _illegal_contact_kernel( + forces: wp.array(dtype=wp.vec3f, ndim=3), + body_ids: wp.array(dtype=wp.int32), + threshold: float, + out: wp.array(dtype=wp.bool), +): + """Terminate when any selected body's max-over-history contact force norm exceeds threshold.""" + i = wp.tid() + violated = bool(False) + for k in range(body_ids.shape[0]): + b = body_ids[k] + for h in range(forces.shape[1]): + f = forces[i, h, b] + norm = wp.sqrt(f[0] * f[0] + f[1] * f[1] + f[2] * f[2]) + if norm > threshold: + violated = True + out[i] = violated + + +def illegal_contact(env: ManagerBasedRLEnv, out, threshold: float, sensor_cfg: SceneEntityCfg) -> None: + """Terminate when the contact force on the sensor exceeds the force threshold. Writes into ``out``. + + Warp-first override of :func:`isaaclab.envs.mdp.terminations.illegal_contact`. + """ + contact_sensor = env.scene.sensors[sensor_cfg.name] + wp.launch( + kernel=_illegal_contact_kernel, + dim=env.num_envs, + inputs=[contact_sensor.data.net_forces_w_history, sensor_cfg.body_ids_wp, threshold, out], + device=env.device, + ) diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/utils/io_descriptors.py b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/io_descriptors.py index 47cbad370632..19b6530b0b0e 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/envs/utils/io_descriptors.py +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/io_descriptors.py @@ -59,10 +59,12 @@ def _make_descriptor(**kwargs: Any) -> GenericObservationIODescriptor: desc = GenericObservationIODescriptor(**known) # User defined extras are stored in the descriptor under the `extras` field desc.extras = extras + # ``out_dim`` is kept as a top-level attribute (not in extras) so the + # observation manager can read it without inspecting extras. + desc.out_dim = extras.pop("out_dim", None) return desc -# TODO(jichuanh): The exact usage is unclear and this need revisit # Decorator factory for Warp-first IO descriptors. def generic_io_descriptor_warp( _func: Callable[Concatenate[ManagerBasedEnv, P], R] | None = None, @@ -183,18 +185,38 @@ def wrapper(env: ManagerBasedEnv, *args: P.args, **kwargs: P.kwargs) -> R: def record_shape(output: wp.array | None, descriptor: GenericObservationIODescriptor, **kwargs) -> None: """Record the shape of the output buffer. - No-op when ``output`` is ``None`` (the typical case during Warp-first - inspection). Use a type-specific hook such as :func:`record_joint_shape` - to derive shape from config instead. + When ``output`` is not ``None`` (eager path), shape is read directly. + When ``output`` is ``None`` (Warp-first inspection), shape is derived from: + - ``descriptor.extras["axes"]`` for RootState observations, or + - ``asset_cfg.joint_ids`` for JointState observations. + + BodyState shape cannot be derived without calling the function (the per-body + feature size varies). In that case shape is left unset. Args: output: The pre-allocated output buffer, or ``None`` during inspection. descriptor: The descriptor to record the shape to. **kwargs: Additional keyword arguments. """ - if output is None: + if output is not None: + descriptor.shape = (output.shape[-1],) + return + # --- Warp-first fallback: derive shape without output --- + # 1) From axes metadata (RootState) + axes = descriptor.extras.get("axes") if descriptor.extras else None + if axes: + descriptor.shape = (len(axes),) return - descriptor.shape = (output.shape[-1],) + # 2) From asset_cfg for JointState + if descriptor.observation_type == "JointState": + asset_cfg = kwargs.get("asset_cfg") + if asset_cfg is not None: + asset: Articulation = kwargs["env"].scene[asset_cfg.name] + joint_ids = asset_cfg.joint_ids + if joint_ids == slice(None): + descriptor.shape = (len(asset.joint_names),) + else: + descriptor.shape = (len(joint_ids),) def record_dtype(output: wp.array | None, descriptor: GenericObservationIODescriptor, **kwargs) -> None: diff --git a/source/isaaclab_experimental/isaaclab_experimental/managers/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/managers/__init__.py index 62d3171d32a0..b4521b98434a 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/managers/__init__.py +++ b/source/isaaclab_experimental/isaaclab_experimental/managers/__init__.py @@ -11,10 +11,12 @@ from isaaclab.managers import * # noqa: F401,F403 -# Override the stable implementation with the experimental fork. from .action_manager import ActionManager # noqa: F401 from .command_manager import CommandManager # noqa: F401 from .event_manager import EventManager # noqa: F401 + +# Override the stable implementation with the experimental fork. +from .manager_base import ManagerTermBase # noqa: F401 from .manager_term_cfg import ObservationTermCfg, RewardTermCfg, TerminationTermCfg # noqa: F401 from .observation_manager import ObservationManager # noqa: F401 from .reward_manager import RewardManager # noqa: F401 diff --git a/source/isaaclab_experimental/isaaclab_experimental/managers/action_manager.py b/source/isaaclab_experimental/isaaclab_experimental/managers/action_manager.py index 78ef70a99b13..672af82bc8ab 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/managers/action_manager.py +++ b/source/isaaclab_experimental/isaaclab_experimental/managers/action_manager.py @@ -27,22 +27,8 @@ if TYPE_CHECKING: from isaaclab.envs import ManagerBasedEnv - -@wp.kernel -def _zero_masked_2d( - # input - mask: wp.array(dtype=wp.bool), - # input/output - data: wp.array(dtype=wp.float32, ndim=2), -): - """Zero rows of a 2D buffer where ``mask`` is True. - - Launched with dim = (num_envs, data.shape[1]). - """ - - env_id, j = wp.tid() - if mask[env_id]: - data[env_id, j] = 0.0 +# Shared kernel – imported from utils to avoid duplication. +from isaaclab_experimental.utils.warp.utils import zero_masked_2d as _zero_masked_2d class ActionTerm(ManagerTermBase): diff --git a/source/isaaclab_experimental/isaaclab_experimental/managers/event_manager.py b/source/isaaclab_experimental/isaaclab_experimental/managers/event_manager.py index 6adc9f055d5e..3b36613a2ac9 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/managers/event_manager.py +++ b/source/isaaclab_experimental/isaaclab_experimental/managers/event_manager.py @@ -311,21 +311,21 @@ def apply( self._apply_interval(float(dt)) return + # resolve the environment mask + if env_mask_wp is None: + if wp.get_device().is_capturing: + raise ValueError(f"Event mode '{mode}' requires the environment mask to be provided when capturing.") + env_mask_wp = self._env.resolve_env_mask(env_ids=env_ids) + if mode == "reset": if global_env_step_count is None: raise ValueError(f"Event mode '{mode}' requires the total number of environment steps to be provided.") - if env_mask_wp is None: - if wp.get_device().is_capturing: - raise ValueError( - f"Event mode '{mode}' requires the environment mask to be provided when capturing." - ) - env_mask_wp = self._env.resolve_env_mask(env_ids=env_ids) self._apply_reset(env_mask_wp, global_env_step_count) return - # other modes keep the stable convention (env_ids forwarded) + # other modes (startup, prestartup, custom) — env_mask forwarded for term_cfg in self._mode_term_cfgs[mode]: - term_cfg.func(self._env, env_ids, **term_cfg.params) + term_cfg.func(self._env, env_mask_wp, **term_cfg.params) def _apply_interval(self, dt: float) -> None: if self._env.rng_state_wp is None: diff --git a/source/isaaclab_experimental/isaaclab_experimental/managers/manager_base.py b/source/isaaclab_experimental/isaaclab_experimental/managers/manager_base.py index e4abb1fa2c74..d40920b23fe9 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/managers/manager_base.py +++ b/source/isaaclab_experimental/isaaclab_experimental/managers/manager_base.py @@ -28,6 +28,8 @@ import isaaclab.utils.string as string_utils from isaaclab.utils import class_to_dict, string_to_callable +from isaaclab_experimental.utils.warp import is_warp_capturable + from .manager_term_cfg import ManagerTermBaseCfg from .scene_entity_cfg import SceneEntityCfg @@ -401,6 +403,12 @@ def _resolve_common_term_cfg(self, term_name: str, term_cfg: ManagerTermBaseCfg, f" and optional parameters: {args_with_defaults}, but received: {term_params}." ) + # register non-capturable terms with the call switch for mode=2 fallback + if not is_warp_capturable(term_cfg.func): + switch = getattr(self._env, "_manager_call_switch", None) + if switch is not None: + switch.register_manager_capturability(type(self).__name__, False) + # process attributes at runtime # these properties are only resolvable once the simulation starts playing if self._env.sim.is_playing(): diff --git a/source/isaaclab_experimental/isaaclab_experimental/managers/observation_manager.py b/source/isaaclab_experimental/isaaclab_experimental/managers/observation_manager.py index f73f30e5a9b8..07287eb03fbc 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/managers/observation_manager.py +++ b/source/isaaclab_experimental/isaaclab_experimental/managers/observation_manager.py @@ -526,6 +526,13 @@ def compute_group(self, group_name: str, update_history: bool = False) -> torch. # TODO(jichuanh): This is not migrated yet. Need revisit. # Update the history buffer if observation term has history enabled if term_cfg.history_length > 0: + # circular buffer is not capture safe + if wp.get_device().is_capturing: + raise RuntimeError( + "Observation terms with history (circular buffer) are not CUDA-graph-capture-safe yet. " + "Disable history for observation terms used inside a captured graph, or restructure " + "the graph to exclude history-buffered terms." + ) circular_buffer = self._group_obs_term_history_buffer[group_name][term_name] if update_history: circular_buffer.append(wp.to_torch(term_cfg.out_wp)) @@ -703,9 +710,10 @@ def _prepare_terms(self): # noqa: C901 f" but received: {len(term_cfg.scale)}." ) - # cast the scale into torch tensor - term_cfg.scale = torch.tensor(term_cfg.scale, dtype=torch.float, device=self._env.device) - term_cfg.scale_wp = wp.from_torch(term_cfg.scale, dtype=wp.float32) + scale_vals = ( + term_cfg.scale if isinstance(term_cfg.scale, tuple) else [float(term_cfg.scale)] * obs_dims[1] + ) + term_cfg.scale_wp = wp.array(scale_vals, dtype=wp.float32, device=self._env.device) # prepare modifiers for each observation if term_cfg.modifiers is not None: @@ -842,16 +850,43 @@ def _prepare_terms(self): # noqa: C901 self._group_obs_term_history_buffer[group_name] = group_entry_history_buffer def _infer_term_dim_scalar(self, term_cfg: ObservationTermCfg) -> int: - """Infer (D,) using scalar scene info (no term execution).""" - # allow explicit override + """Infer observation output dimension (D,) using decorator metadata, scene info, or manager state. + + Resolution order: + 1. ``out_dim`` on the function's ``@generic_io_descriptor_warp`` decorator. + 2. ``axes`` on the decorator (e.g. ``axes=["X","Y","Z"]`` → dim 3). + 3. Explicit ``term_dim`` / ``out_dim`` / ``obs_dim`` in ``term_cfg.params`` (legacy). + 4. ``asset_cfg.joint_ids`` count (joint-based observations). + """ + # --- 1-2. Decorator metadata (preferred) --- + func = term_cfg.func + # Check for descriptor on the (possibly wrapped) function first, + # then fall back to unwrapping for class-based terms. + descriptor = getattr(func, "_descriptor", None) + if descriptor is None and hasattr(func, "__wrapped__"): + descriptor = getattr(func.__wrapped__, "_descriptor", None) + if descriptor is not None: + # 1. Explicit out_dim on decorator + out_dim = getattr(descriptor, "out_dim", None) + if out_dim is not None: + return self._resolve_out_dim(out_dim, term_cfg) + # 2. Derive from axes metadata + axes = descriptor.extras.get("axes") if descriptor.extras else None + if axes is not None: + return len(axes) + + # --- 3. Legacy explicit override in params --- for k in ("term_dim", "out_dim", "obs_dim"): if k in term_cfg.params: return int(term_cfg.params[k]) - # try explicit param first + + # --- 3. Joint-based fallback via asset_cfg --- asset_cfg = term_cfg.params.get("asset_cfg") if asset_cfg is None: - raise ValueError(f"Observation term '{term_cfg.params}' has no asset_cfg parameter.") - # resolve selection + raise ValueError( + f"Cannot infer output dimension for observation term '{getattr(func, '__name__', func)}'. " + "Add `out_dim=` to its @generic_io_descriptor_warp decorator." + ) asset = self._env.scene[asset_cfg.name] joint_ids_wp = getattr(asset_cfg, "joint_ids_wp", None) if joint_ids_wp is not None: @@ -860,3 +895,47 @@ def _infer_term_dim_scalar(self, term_cfg: ObservationTermCfg) -> int: if isinstance(joint_ids, slice): return int(getattr(asset, "num_joints", wp.to_torch(asset.data.joint_pos).shape[1])) return int(len(joint_ids)) + + def _resolve_out_dim(self, out_dim: int | str, term_cfg: ObservationTermCfg) -> int: + """Resolve an ``out_dim`` value from a decorator into a concrete integer. + + Supports: + - ``int``: returned as-is (fixed dimension). + - ``"joint"``: number of selected joints from ``asset_cfg``. + - ``"body:N"``: ``N`` components per selected body from ``asset_cfg``. + - ``"command"``: query ``command_manager.get_command(name).shape[-1]``. + - ``"action"``: query ``action_manager.action.shape[-1]``. + """ + if isinstance(out_dim, int): + return out_dim + + if out_dim == "joint": + asset_cfg = term_cfg.params.get("asset_cfg") + asset = self._env.scene[asset_cfg.name] + joint_ids_wp = getattr(asset_cfg, "joint_ids_wp", None) + if joint_ids_wp is not None: + return int(joint_ids_wp.shape[0]) + joint_ids = getattr(asset_cfg, "joint_ids", slice(None)) + if isinstance(joint_ids, slice): + return int(getattr(asset, "num_joints", wp.to_torch(asset.data.joint_pos).shape[1])) + return int(len(joint_ids)) + + if isinstance(out_dim, str) and out_dim.startswith("body:"): + per_body = int(out_dim.split(":")[1]) + asset_cfg = term_cfg.params.get("asset_cfg") + body_ids = getattr(asset_cfg, "body_ids", None) + if body_ids is None or body_ids == slice(None): + asset = self._env.scene[asset_cfg.name] + return per_body * len(asset.body_names) + return per_body * len(body_ids) + + if out_dim == "command": + command_name = term_cfg.params.get("command_name") + cmd = self._env.command_manager.get_command(command_name) + return int(cmd.shape[-1]) + + if out_dim == "action": + action = self._env.action_manager.action + return int(action.shape[-1]) + + raise ValueError(f"Unknown out_dim sentinel: {out_dim!r}") diff --git a/source/isaaclab_experimental/isaaclab_experimental/managers/scene_entity_cfg.py b/source/isaaclab_experimental/isaaclab_experimental/managers/scene_entity_cfg.py index 9f58cbe8ddfd..1930f6ce1cb7 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/managers/scene_entity_cfg.py +++ b/source/isaaclab_experimental/isaaclab_experimental/managers/scene_entity_cfg.py @@ -25,30 +25,36 @@ class SceneEntityCfg(_SceneEntityCfg): - `joint_mask` is intended for Warp kernels only. """ - """Boolean mask over all joints — used by warp kernels for masked writes.""" joint_mask: wp.array | None = None """Integer indices of selected joints — used for subset-sized gathers where a boolean mask cannot provide the mapping from output index k to joint index.""" joint_ids_wp: wp.array | None = None + """Integer indices of selected bodies — used for subset-sized body gathers.""" + body_ids_wp: wp.array | None = None + def resolve(self, scene: InteractiveScene): # run the stable resolution first (fills joint_ids/body_ids from names/regex) super().resolve(scene) - # Build a Warp joint mask for articulations only. entity = scene[self.name] - if not isinstance(entity, BaseArticulation): - return - - # Pre-allocate a full-length mask (all True for default selection). - if self.joint_ids == slice(None): - joint_ids_list = list(range(entity.num_joints)) - mask_list = [True] * entity.num_joints - else: - joint_ids_list = list(self.joint_ids) - mask_list = [False] * entity.num_joints - for idx in joint_ids_list: - mask_list[idx] = True - self.joint_mask = wp.array(mask_list, dtype=wp.bool, device=scene.device) - self.joint_ids_wp = wp.array(joint_ids_list, dtype=wp.int32, device=scene.device) + + # -- Warp joint mask / ids for articulations + if isinstance(entity, BaseArticulation): + if self.joint_ids == slice(None): + joint_ids_list = list(range(entity.num_joints)) + mask_list = [True] * entity.num_joints + else: + joint_ids_list = list(self.joint_ids) + mask_list = [False] * entity.num_joints + for idx in joint_ids_list: + mask_list[idx] = True + self.joint_mask = wp.array(mask_list, dtype=wp.bool, device=scene.device) + self.joint_ids_wp = wp.array(joint_ids_list, dtype=wp.int32, device=scene.device) + + # -- Warp body ids + if self.body_ids is not None and self.body_ids != slice(None): + self.body_ids_wp = wp.array(list(self.body_ids), dtype=wp.int32, device=scene.device) + elif hasattr(entity, "num_bodies"): + self.body_ids_wp = wp.array(list(range(entity.num_bodies)), dtype=wp.int32, device=scene.device) diff --git a/source/isaaclab_experimental/isaaclab_experimental/utils/manager_call_switch.py b/source/isaaclab_experimental/isaaclab_experimental/utils/manager_call_switch.py index 2f37b1087ca6..ad149900af2c 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/utils/manager_call_switch.py +++ b/source/isaaclab_experimental/isaaclab_experimental/utils/manager_call_switch.py @@ -46,6 +46,7 @@ class ManagerCallSwitch: "ObservationManager", "EventManager", "RecorderManager", + "CommandManager", "TerminationManager", "RewardManager", "CurriculumManager", @@ -166,17 +167,18 @@ def get_mode_for_manager(self, manager_name: str) -> ManagerCallMode: """Return the resolved execution mode for the given manager. Looks up the manager in the config dict, falls back to the default, - then caps by :attr:`MAX_MODE_OVERRIDES`. + then caps by :attr:`_max_modes` (static overrides + dynamic registrations). """ - mode_value = self._cfg.get(manager_name, self._cfg[self.DEFAULT_KEY]) + default_key = next(iter(self.DEFAULT_CONFIG)) + mode_value = self._cfg.get(manager_name, self._cfg[default_key]) cap = self._max_modes.get(manager_name) if cap is not None: mode_value = min(mode_value, cap) return ManagerCallMode(mode_value) - def resolve_manager_class(self, manager_name: str) -> type: + def resolve_manager_class(self, manager_name: str, mode_override: ManagerCallMode | int | None = None) -> type: """Import and return the manager class for the configured mode.""" - mode = self.get_mode_for_manager(manager_name) + mode = self.get_mode_for_manager(manager_name) if mode_override is None else ManagerCallMode(mode_override) module_name = "isaaclab.managers" if mode == ManagerCallMode.STABLE else "isaaclab_experimental.managers" module = importlib.import_module(module_name) if not hasattr(module, manager_name): @@ -206,8 +208,8 @@ def _run_call(self, call: dict[str, Any]) -> Any: def _wp_capture_or_launch(self, stage: str, call: dict[str, Any]) -> Any: """Capture Warp CUDA graph on first call, then replay. - Delegates to :class:`WarpGraphCache` which caches the return value - and replays immediately after the first capture for validation. + Delegates to :class:`WarpGraphCache` which handles warm-up, capture, + caching the return value, and replay. """ return self._graph_cache.capture_or_replay( stage, @@ -236,7 +238,7 @@ def _load_cfg(self, cfg_source: dict | str | None) -> dict[str, int]: else: raise TypeError(f"cfg_source must be a dict, string, or None, got: {type(cfg_source)}") - # Validation + # validation for manager_name, mode_value in cfg.items(): if not isinstance(mode_value, int): raise TypeError( @@ -249,7 +251,7 @@ def _load_cfg(self, cfg_source: dict | str | None) -> dict[str, int]: f"Invalid manager_call_config value for '{manager_name}': {mode_value}. Expected 0/1/2." ) from exc - # Apply MAX_MODE_OVERRIDES: bake caps into the resolved config so + # Apply max mode caps: bake caps into the resolved config so # get_mode_for_manager never needs per-call branching. default_mode = cfg[self.DEFAULT_KEY] for name, max_mode in self._max_modes.items(): diff --git a/source/isaaclab_experimental/isaaclab_experimental/utils/warp/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/utils/warp/__init__.py index a7e71ac46887..2d071b823a46 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/utils/warp/__init__.py +++ b/source/isaaclab_experimental/isaaclab_experimental/utils/warp/__init__.py @@ -6,4 +6,11 @@ """Warp utility functions and shared kernels for isaaclab_experimental.""" from .kernels import compute_reset_scale, count_masked -from .utils import WarpCapturable, resolve_1d_mask, wrap_to_pi +from .utils import ( + WarpCapturable, + is_warp_capturable, + resolve_1d_mask, + warp_capturable, + wrap_to_pi, + zero_masked_2d, +) diff --git a/source/isaaclab_experimental/isaaclab_experimental/utils/warp/utils.py b/source/isaaclab_experimental/isaaclab_experimental/utils/warp/utils.py index c7a2c63d9594..a4beb8627ef9 100644 --- a/source/isaaclab_experimental/isaaclab_experimental/utils/warp/utils.py +++ b/source/isaaclab_experimental/isaaclab_experimental/utils/warp/utils.py @@ -39,7 +39,7 @@ def resolve_1d_mask( Args: ids: Indices to set to ``True``. ``None`` or ``slice(None)`` means all. mask: Explicit boolean mask. If provided, returned directly (after - torch→warp normalization if needed). Takes precedence over *ids*. + torch->warp normalization if needed). Takes precedence over *ids*. all_mask: Pre-allocated all-True mask of shape ``(size,)``, returned when both *ids* and *mask* are ``None``. scratch_mask: Pre-allocated scratch mask of shape ``(size,)``, filled @@ -47,7 +47,7 @@ def resolve_1d_mask( device: Warp device string. Returns: - A ``wp.array(dtype=wp.bool)`` — ``mask``, ``all_mask``, or ``scratch_mask``. + A ``wp.array(dtype=wp.bool)`` -- ``mask``, ``all_mask``, or ``scratch_mask``. """ # Fast path: explicit mask provided. if mask is not None: @@ -97,6 +97,37 @@ def resolve_1d_mask( return scratch_mask +def warp_capturable(capturable: bool): + """Annotate an MDP term's CUDA-graph capturability. + + No-wrapper decorator: sets ``_warp_capturable`` directly on the function + and returns it unchanged. Safe to stack with any other decorator in any order. + + By default all MDP terms are assumed capturable (True). Use + ``@warp_capturable(False)`` on terms that call non-capturable external APIs. + """ + + def decorator(func): + func._warp_capturable = capturable + return func + + return decorator + + +def is_warp_capturable(func) -> bool: + """Check if a term function is CUDA-graph-capturable. + + Checks ``_warp_capturable`` on the function and its ``__wrapped__`` target. + Returns True (capturable) by default if no annotation is found. + """ + for f in (func, getattr(func, "__wrapped__", None)): + if f is not None: + val = getattr(f, "_warp_capturable", None) + if val is not None: + return val + return True + + @wp.func def wrap_to_pi(angle: float) -> float: """Wrap input angle (in radians) to the range [-pi, pi].""" @@ -163,3 +194,11 @@ def is_capturable(func) -> bool: if val is not None: return val return True + + +@wp.kernel +def zero_masked_2d(mask: wp.array(dtype=wp.bool), values: wp.array(dtype=wp.float32, ndim=2)): + """Zero out rows of a 2D float32 array where mask is True.""" + env_id, j = wp.tid() + if mask[env_id]: + values[env_id, j] = 0.0 diff --git a/source/isaaclab_experimental/test/envs/mdp/parity_helpers.py b/source/isaaclab_experimental/test/envs/mdp/parity_helpers.py new file mode 100644 index 000000000000..33a21907ee94 --- /dev/null +++ b/source/isaaclab_experimental/test/envs/mdp/parity_helpers.py @@ -0,0 +1,613 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Shared test utilities for MDP warp-vs-stable parity tests. + +Contains constants, assertion helpers, warp kernel runners, mock classes, +numpy math utilities, and mutation helpers used by the observation, reward, +termination, event, and action parity test files. +""" + +from __future__ import annotations + +import numpy as np +import torch +import warp as wp + +# --------------------------------------------------------------------------- +# Constants (shared across all MDP parity test files) +# --------------------------------------------------------------------------- +NUM_ENVS = 64 +NUM_JOINTS = 12 +NUM_ACTIONS = 6 +DEVICE = "cuda:0" +ATOL = 1e-5 +RTOL = 1e-5 + +# Body/sensor-level defaults shared by observation, reward, and termination tests +NUM_BODIES = 4 +NUM_HISTORY = 3 +CMD_DIM = 3 +BODY_IDS = [0, 2] + +# Gravity direction constant (normalized, same as ArticulationData.GRAVITY_VEC_W) +GRAVITY_DIR_NP = np.array([[0.0, 0.0, -1.0]], dtype=np.float32) + + +# --------------------------------------------------------------------------- +# Numpy math utilities +# --------------------------------------------------------------------------- + + +def quat_rotate_inv_np(q_xyzw: np.ndarray, v: np.ndarray) -> np.ndarray: + """Apply inverse quaternion rotation to vectors (numpy, batch). + + Equivalent to ``wp.quat_rotate_inv`` — rotates *v* by the conjugate of *q*. + + Args: + q_xyzw: (N, 4) quaternion array in [x, y, z, w] order (warp convention). + v: (N, 3) vector array. + + Returns: + (N, 3) rotated vectors in float32. + """ + qv = -q_xyzw[..., :3] # conjugate xyz + qw = q_xyzw[..., 3:4] + t = 2.0 * np.cross(qv, v) + return (v + qw * t + np.cross(qv, t)).astype(np.float32) + + +# --------------------------------------------------------------------------- +# Warp / numpy utilities +# --------------------------------------------------------------------------- + + +def copy_np_to_wp(dest: wp.array, src_np: np.ndarray): + """In-place overwrite of a warp array's contents from numpy (preserves pointer).""" + tmp = wp.array(src_np, dtype=dest.dtype, device=str(dest.device)) + wp.copy(dest, tmp) + + +# --------------------------------------------------------------------------- +# Test runner helpers +# --------------------------------------------------------------------------- + + +def run_warp_obs(func, env, shape, device=DEVICE, **kwargs): + """Run a warp observation function and return the result as a torch tensor.""" + out = wp.zeros(shape, dtype=wp.float32, device=device) + func(env, out, **kwargs) + return wp.to_torch(out).clone() + + +def run_warp_obs_captured(func, env, shape, device=DEVICE, **kwargs): + """Run a warp observation function under CUDA graph capture and return the result.""" + out = wp.zeros(shape, dtype=wp.float32, device=device) + func(env, out, **kwargs) # warm-up + with wp.ScopedCapture() as capture: + func(env, out, **kwargs) + wp.capture_launch(capture.graph) + return wp.to_torch(out).clone() + + +def run_warp_rew(func, env, device=DEVICE, **kwargs): + """Run a warp reward function and return the result as a torch tensor.""" + out = wp.zeros((NUM_ENVS,), dtype=wp.float32, device=device) + func(env, out, **kwargs) + return wp.to_torch(out).clone() + + +def run_warp_rew_captured(func, env, device=DEVICE, **kwargs): + """Run a warp reward function under CUDA graph capture.""" + out = wp.zeros((NUM_ENVS,), dtype=wp.float32, device=device) + func(env, out, **kwargs) # warm-up + with wp.ScopedCapture() as capture: + func(env, out, **kwargs) + wp.capture_launch(capture.graph) + return wp.to_torch(out).clone() + + +def run_warp_term(func, env, device=DEVICE, **kwargs): + """Run a warp termination function and return the result as a torch tensor.""" + out = wp.zeros((NUM_ENVS,), dtype=wp.bool, device=device) + func(env, out, **kwargs) + return wp.to_torch(out).clone() + + +def run_warp_term_captured(func, env, device=DEVICE, **kwargs): + """Run a warp termination function under CUDA graph capture.""" + out = wp.zeros((NUM_ENVS,), dtype=wp.bool, device=device) + func(env, out, **kwargs) # warm-up + with wp.ScopedCapture() as capture: + func(env, out, **kwargs) + wp.capture_launch(capture.graph) + return wp.to_torch(out).clone() + + +# --------------------------------------------------------------------------- +# Assertion helpers +# --------------------------------------------------------------------------- + + +def assert_close(actual: torch.Tensor, expected: torch.Tensor, atol: float = ATOL, rtol: float = RTOL): + torch.testing.assert_close(actual, expected, atol=atol, rtol=rtol) + + +def assert_equal(actual: torch.Tensor, expected: torch.Tensor): + assert torch.equal(actual, expected), f"Mismatch:\n actual: {actual}\n expected: {expected}" + + +# --------------------------------------------------------------------------- +# Mock classes (shared across parity test files) +# --------------------------------------------------------------------------- + + +class MockArticulationData: + """Mock articulation data backed by Warp arrays (same storage Newton uses). + + Args: + num_envs: Number of environments. + num_joints: Number of joints. + device: Warp device string. + seed: Random seed for reproducibility. + num_bodies: Number of bodies. When > 0, generates body-level arrays + (body_pose_w, body_lin_acc_w, body_com_pos_b) and multi-body + projected_gravity_b. When 0, projected_gravity_b is root-level + (derived from root quaternion). + """ + + def __init__(self, num_envs=NUM_ENVS, num_joints=NUM_JOINTS, device=DEVICE, seed=42, num_bodies=0): + rng = np.random.RandomState(seed) + + # --- Joint state (float32 2D) --- + self.joint_pos = wp.array(rng.randn(num_envs, num_joints).astype(np.float32), device=device) + self.joint_vel = wp.array(rng.randn(num_envs, num_joints).astype(np.float32) * 2.0, device=device) + self.joint_acc = wp.array(rng.randn(num_envs, num_joints).astype(np.float32) * 0.5, device=device) + self.default_joint_pos = wp.array(rng.randn(num_envs, num_joints).astype(np.float32) * 0.01, device=device) + self.default_joint_vel = wp.array(np.zeros((num_envs, num_joints), dtype=np.float32), device=device) + self.applied_torque = wp.array(rng.randn(num_envs, num_joints).astype(np.float32) * 10.0, device=device) + self.computed_torque = wp.array(rng.randn(num_envs, num_joints).astype(np.float32) * 10.0, device=device) + + # --- Soft joint limits --- + limits_np = np.zeros((num_envs, num_joints, 2), dtype=np.float32) + limits_np[:, :, 0] = -3.14 + limits_np[:, :, 1] = 3.14 + self.soft_joint_pos_limits = wp.array(limits_np, dtype=wp.vec2f, device=device) + self.soft_joint_vel_limits = wp.array(np.full((num_envs, num_joints), 10.0, dtype=np.float32), device=device) + + # --- Root state --- + root_pos_np = rng.randn(num_envs, 3).astype(np.float32) + root_pos_np[:, 2] = np.abs(root_pos_np[:, 2]) + 0.1 # positive heights + self.root_pos_w = wp.array(root_pos_np, dtype=wp.vec3f, device=device) + + # Unit quaternions + quat_np = rng.randn(num_envs, 4).astype(np.float32) + quat_np /= np.linalg.norm(quat_np, axis=1, keepdims=True) + self.root_quat_w = wp.array(quat_np, dtype=wp.quatf, device=device) + + # Tier 1 compound: root_link_pose_w (transformf = pos + quat) + pose_np = np.zeros((num_envs, 7), dtype=np.float32) + pose_np[:, :3] = root_pos_np + pose_np[:, 3:] = quat_np + self.root_link_pose_w = wp.array(pose_np, dtype=wp.transformf, device=device) + + # World-frame velocities + lin_vel_w_np = rng.randn(num_envs, 3).astype(np.float32) + ang_vel_w_np = rng.randn(num_envs, 3).astype(np.float32) + self.root_lin_vel_w = wp.array(lin_vel_w_np, dtype=wp.vec3f, device=device) + self.root_ang_vel_w = wp.array(ang_vel_w_np, dtype=wp.vec3f, device=device) + + # Tier 1 compound: root_com_vel_w (spatial_vectorf: top=linear, bottom=angular) + vel_np = np.zeros((num_envs, 6), dtype=np.float32) + vel_np[:, :3] = lin_vel_w_np + vel_np[:, 3:] = ang_vel_w_np + self.root_com_vel_w = wp.array(vel_np, dtype=wp.spatial_vectorf, device=device) + + # Gravity direction constant (1D array to match kernel signatures) + self.GRAVITY_VEC_W = wp.array([0.0, 0.0, -1.0], dtype=wp.vec3f, device=device) + + # Derived body-frame quantities (consistent with Tier 1 compounds) + self.root_lin_vel_b = wp.array(quat_rotate_inv_np(quat_np, lin_vel_w_np), dtype=wp.vec3f, device=device) + self.root_ang_vel_b = wp.array(quat_rotate_inv_np(quat_np, ang_vel_w_np), dtype=wp.vec3f, device=device) + + # --- projected_gravity_b and body-level data --- + if num_bodies > 0: + # Multi-body projected_gravity_b: (num_envs, num_bodies) vec3f + grav_np = rng.randn(num_envs, num_bodies, 3).astype(np.float32) + grav_np[:, :, 2] = -1.0 + grav_np /= np.linalg.norm(grav_np, axis=2, keepdims=True) + self.projected_gravity_b = wp.array(grav_np, dtype=wp.vec3f, device=device) + + # body_pose_w: (num_envs, num_bodies) transformf + bpose_np = np.zeros((num_envs, num_bodies, 7), dtype=np.float32) + bpose_np[:, :, :3] = rng.randn(num_envs, num_bodies, 3).astype(np.float32) + bpose_np[:, :, 3:7] = [0.0, 0.0, 0.0, 1.0] + self.body_pose_w = wp.array(bpose_np, dtype=wp.transformf, device=device) + + # body_lin_acc_w: (num_envs, num_bodies) vec3f + self.body_lin_acc_w = wp.array( + rng.randn(num_envs, num_bodies, 3).astype(np.float32), dtype=wp.vec3f, device=device + ) + + # body_com_pos_b: (num_envs, num_bodies) vec3f + self.body_com_pos_b = wp.array( + rng.randn(num_envs, num_bodies, 3).astype(np.float32) * 0.01, dtype=wp.vec3f, device=device + ) + else: + # Root-level projected_gravity_b: (num_envs,) vec3f — derived from root quat + self.projected_gravity_b = wp.array( + quat_rotate_inv_np(quat_np, np.tile(GRAVITY_DIR_NP, (num_envs, 1))), + dtype=wp.vec3f, + device=device, + ) + + # --- Event-specific data --- + self.root_vel_w = wp.array(rng.randn(num_envs, 6).astype(np.float32), dtype=wp.spatial_vectorf, device=device) + + default_pose_np = np.zeros((num_envs, 7), dtype=np.float32) + default_pose_np[:, 0:3] = rng.randn(num_envs, 3).astype(np.float32) * 0.1 + default_pose_np[:, 3:7] = [0.0, 0.0, 0.0, 1.0] + self.default_root_pose = wp.array(default_pose_np, dtype=wp.transformf, device=device) + + self.default_root_vel = wp.array( + np.zeros((num_envs, 6), dtype=np.float32), dtype=wp.spatial_vectorf, device=device + ) + + def resolve_joint_mask(self, joint_ids=None): + n = self.joint_pos.shape[1] + mask = [False] * n + if joint_ids is None or isinstance(joint_ids, slice): + mask = [True] * n + else: + for j in joint_ids: + mask[j] = True + return wp.array(mask, dtype=wp.bool, device=str(self.joint_pos.device)) + + +class MockWrenchComposer: + """Mock wrench composer with no-op methods.""" + + def set_forces_and_torques_mask(self, *a, **kw): + pass + + +class MockArticulation: + """Mock articulation asset with simulation write stubs. + + Provides both no-op write stubs (for event tests) and tracking write stubs + (for action tests). The ``last_*_target`` attributes record the most recent + values passed to ``set_joint_*_target``, enabling verification in action tests. + """ + + def __init__(self, data: MockArticulationData, num_bodies: int = 1, num_joints: int = NUM_JOINTS): + self.data = data + self.num_bodies = num_bodies + self.num_joints = num_joints + self.device = DEVICE + self._joint_names = [f"joint_{i}" for i in range(num_joints)] + self.permanent_wrench_composer = MockWrenchComposer() + # Tracking attributes for action tests + self.last_pos_target = None + self.last_vel_target = None + self.last_effort_target = None + self.last_joint_mask = None + + # -- Simulation write stubs (no-op, for event tests) -------------------- + + def write_root_velocity_to_sim(self, *a, **kw): + pass + + def write_root_velocity_to_sim_mask(self, *a, **kw): + pass + + def write_root_pose_to_sim(self, *a, **kw): + pass + + def write_root_pose_to_sim_mask(self, *a, **kw): + pass + + def write_joint_state_to_sim(self, *a, **kw): + pass + + def write_joint_position_to_sim_mask(self, *a, **kw): + pass + + def write_joint_velocity_to_sim_mask(self, *a, **kw): + pass + + def set_external_force_and_torque(self, *a, **kw): + pass + + # -- Action write stubs (tracking, for action tests) -------------------- + + def set_joint_position_target(self, target, joint_ids=None, joint_mask=None): + self.last_pos_target = target + self.last_joint_mask = joint_mask + + def set_joint_velocity_target(self, target, joint_ids=None, joint_mask=None): + self.last_vel_target = target + self.last_joint_mask = joint_mask + + def set_joint_effort_target(self, target, joint_ids=None, joint_mask=None): + self.last_effort_target = target + self.last_joint_mask = joint_mask + + def set_joint_position_target_index(self, target, joint_ids=None): + self.last_pos_target = target + + def set_joint_effort_target_index(self, target, joint_ids=None): + self.last_effort_target = target + + # -- Query stubs -------------------------------------------------------- + + def find_joints(self, names, preserve_order=False): + if isinstance(names, list) and names == [".*"]: + return list(range(self.num_joints)), list(self._joint_names) + ids = [] + resolved = [] + for name in names if isinstance(names, list) else [names]: + for i, jn in enumerate(self._joint_names): + if (name in jn or name == jn or name == ".*") and i not in ids: + ids.append(i) + resolved.append(jn) + if not ids: + ids = list(range(self.num_joints)) + resolved = list(self._joint_names) + return ids, resolved + + def find_bodies(self, name): + return [0], [name] + + +class MockScene: + """Mock scene with asset lookup, env origins, and optional sensors.""" + + def __init__(self, assets: dict, env_origins, sensors=None): + self._assets = assets + self.env_origins = env_origins + self.sensors = sensors or {} + self.articulations = dict(assets) + self.rigid_objects = {} + self.num_envs = NUM_ENVS + + def __getitem__(self, name: str): + return self._assets[name] + + +# --------------------------------------------------------------------------- +# Root-state mutation helper +# --------------------------------------------------------------------------- + + +def mutate_root_state(rng: np.random.RandomState, art_data: MockArticulationData, num_envs: int = NUM_ENVS): + """Mutate root-level state arrays in-place (preserves buffer pointers). + + Updates root_pos_w, root_quat_w, root_link_pose_w, root_com_vel_w, + root_lin_vel_w, root_ang_vel_w, root_lin_vel_b, root_ang_vel_b, and + (when 1D) projected_gravity_b — all consistently derived from a fresh + random quaternion and world-frame velocities. + """ + root_pos_np = rng.randn(num_envs, 3).astype(np.float32) + root_pos_np[:, 2] = np.abs(root_pos_np[:, 2]) + 0.05 + copy_np_to_wp(art_data.root_pos_w, root_pos_np) + + quat_np = rng.randn(num_envs, 4).astype(np.float32) + quat_np /= np.linalg.norm(quat_np, axis=1, keepdims=True) + copy_np_to_wp(art_data.root_quat_w, quat_np) + + pose_np = np.zeros((num_envs, 7), dtype=np.float32) + pose_np[:, :3] = root_pos_np + pose_np[:, 3:] = quat_np + copy_np_to_wp(art_data.root_link_pose_w, pose_np) + + lin_vel_w_np = rng.randn(num_envs, 3).astype(np.float32) + ang_vel_w_np = rng.randn(num_envs, 3).astype(np.float32) + copy_np_to_wp(art_data.root_lin_vel_w, lin_vel_w_np) + copy_np_to_wp(art_data.root_ang_vel_w, ang_vel_w_np) + + vel_np = np.zeros((num_envs, 6), dtype=np.float32) + vel_np[:, :3] = lin_vel_w_np + vel_np[:, 3:] = ang_vel_w_np + copy_np_to_wp(art_data.root_com_vel_w, vel_np) + + copy_np_to_wp(art_data.root_lin_vel_b, quat_rotate_inv_np(quat_np, lin_vel_w_np)) + copy_np_to_wp(art_data.root_ang_vel_b, quat_rotate_inv_np(quat_np, ang_vel_w_np)) + + # Root-level projected_gravity_b (1D) is derived from quat. + # Multi-body (2D) is mutated separately by callers. + if art_data.projected_gravity_b.ndim == 1: + copy_np_to_wp( + art_data.projected_gravity_b, + quat_rotate_inv_np(quat_np, np.tile(GRAVITY_DIR_NP, (num_envs, 1))), + ) + + +class MockActionManagerWarp: + """Returns warp arrays (for experimental functions).""" + + def __init__(self, action_wp: wp.array, prev_action_wp: wp.array): + self._action = action_wp + self._prev_action = prev_action_wp + + @property + def action(self) -> wp.array: + return self._action + + @property + def prev_action(self) -> wp.array: + return self._prev_action + + +class MockActionManagerTorch: + """Returns torch tensors (for stable functions).""" + + def __init__(self, action_wp: wp.array, prev_action_wp: wp.array): + self._action = wp.to_torch(action_wp) + self._prev_action = wp.to_torch(prev_action_wp) + + @property + def action(self) -> torch.Tensor: + return self._action + + @property + def prev_action(self) -> torch.Tensor: + return self._prev_action + + +# --------------------------------------------------------------------------- +# Shared mock classes (previously duplicated across test files) +# --------------------------------------------------------------------------- + + +class MockSceneEntityCfg: + """Unified cfg that works for both stable (joint_ids) and experimental (joint_mask / joint_ids_wp).""" + + def __init__(self, name: str, joint_ids: list[int], num_joints: int, device: str): + self.name = name + self.joint_ids = joint_ids + + # Experimental extras + mask = [False] * num_joints + for idx in joint_ids: + mask[idx] = True + self.joint_mask = wp.array(mask, dtype=wp.bool, device=device) + self.joint_ids_wp = wp.array(joint_ids, dtype=wp.int32, device=device) + + +class MockContactSensorData: + """Mock contact sensor data with random force history. + + Stores ``net_forces_w_history`` as a warp ``vec3f`` 3D array of shape + ``(num_envs, num_history, num_bodies)``. Both warp kernels (which read + the warp array directly) and stable functions (which call + ``wp.to_torch``) work with this representation. + """ + + def __init__(self, num_envs=NUM_ENVS, num_history=NUM_HISTORY, num_bodies=NUM_BODIES, device=DEVICE, seed=77): + rng = np.random.RandomState(seed) + self.net_forces_w_history = wp.array( + rng.randn(num_envs, num_history, num_bodies, 3).astype(np.float32), + dtype=wp.vec3f, + device=device, + ) + + +class MockContactSensor: + """Mock contact sensor wrapping :class:`MockContactSensorData`.""" + + def __init__(self, data: MockContactSensorData, num_bodies: int = NUM_BODIES): + self.data = data + self.num_bodies = num_bodies + + +class MockCommandTerm: + """Mock command term with time_left and command_counter.""" + + def __init__(self, num_envs=NUM_ENVS, device=DEVICE, seed=88): + rng = np.random.RandomState(seed) + self.time_left = torch.tensor(rng.rand(num_envs).astype(np.float32) * 0.05, device=device) + self.command_counter = torch.tensor(rng.randint(0, 3, (num_envs,)), dtype=torch.float32, device=device) + + +class MockCommandManager: + """Mock command manager returning a fixed command tensor and term.""" + + def __init__(self, command_tensor: torch.Tensor, cmd_term: MockCommandTerm): + self._cmd = command_tensor + self._term = cmd_term + + def get_command(self, name: str) -> torch.Tensor: + return self._cmd + + def get_term(self, name: str): + return self._term + + +class MockBodyCfg: + """SceneEntityCfg-like object for body-level reward/termination terms.""" + + def __init__(self, name="robot", body_ids=None): + self.name = name + self.body_ids = body_ids if body_ids is not None else list(BODY_IDS) + + +class MockSensorCfg: + """SceneEntityCfg-like object for contact sensor terms. + + Provides both ``body_ids`` (for stable functions) and ``body_ids_wp`` + (for experimental warp functions). + """ + + def __init__(self, name="contact_sensor", body_ids=None, device=DEVICE): + self.name = name + self.body_ids = body_ids if body_ids is not None else list(BODY_IDS) + self.body_ids_wp = wp.array(self.body_ids, dtype=wp.int32, device=device) + + +class MockTerminationManager: + """Mock termination manager providing both torch and warp terminated buffers.""" + + def __init__(self, num_envs=NUM_ENVS, device=DEVICE): + self.terminated = torch.zeros(num_envs, dtype=torch.bool, device=device) + self.terminated_wp = wp.from_torch(self.terminated) + + +# --------------------------------------------------------------------------- +# Art-data mutation helpers (previously duplicated in obs/rew/term test files) +# --------------------------------------------------------------------------- + + +def mutate_art_data( + art_data: MockArticulationData, + warp_env, + num_envs: int = NUM_ENVS, + num_joints: int = NUM_JOINTS, + num_actions: int = NUM_ACTIONS, + rng_seed: int = 200, +): + """Mutate every data array in-place so captured graphs see fresh values.""" + rng = np.random.RandomState(rng_seed) + + copy_np_to_wp(art_data.joint_pos, rng.randn(num_envs, num_joints).astype(np.float32) * 1.5) + copy_np_to_wp(art_data.joint_vel, rng.randn(num_envs, num_joints).astype(np.float32) * 3.0) + copy_np_to_wp(art_data.joint_acc, rng.randn(num_envs, num_joints).astype(np.float32) * 0.8) + copy_np_to_wp(art_data.default_joint_pos, rng.randn(num_envs, num_joints).astype(np.float32) * 0.02) + copy_np_to_wp(art_data.applied_torque, rng.randn(num_envs, num_joints).astype(np.float32) * 12.0) + copy_np_to_wp(art_data.computed_torque, rng.randn(num_envs, num_joints).astype(np.float32) * 12.0) + + mutate_root_state(rng, art_data, num_envs) + + copy_np_to_wp(warp_env.action_manager._action, rng.randn(num_envs, num_actions).astype(np.float32)) + copy_np_to_wp(warp_env.action_manager._prev_action, rng.randn(num_envs, num_actions).astype(np.float32)) + + warp_env.episode_length_buf[:] = torch.randint(0, 500, (num_envs,), dtype=torch.int64, device=DEVICE) + + wp.synchronize() + + +def mutate_body_data( + art_data: MockArticulationData, + num_envs: int = NUM_ENVS, + num_bodies: int = NUM_BODIES, + rng_seed: int = 200, +): + """Mutate body-level and root-level data in-place so captured graphs see fresh values.""" + rng = np.random.RandomState(rng_seed) + + mutate_root_state(rng, art_data, num_envs) + + grav_np = rng.randn(num_envs, num_bodies, 3).astype(np.float32) + grav_np[:, :, 2] = -1.0 + grav_np /= np.linalg.norm(grav_np, axis=2, keepdims=True) + copy_np_to_wp(art_data.projected_gravity_b, grav_np) + + copy_np_to_wp(art_data.body_lin_acc_w, rng.randn(num_envs, num_bodies, 3).astype(np.float32)) + + pose_np = np.zeros((num_envs, num_bodies, 7), dtype=np.float32) + pose_np[:, :, :3] = rng.randn(num_envs, num_bodies, 3).astype(np.float32) + pose_np[:, :, 3:7] = [0.0, 0.0, 0.0, 1.0] + copy_np_to_wp(art_data.body_pose_w, pose_np) + + wp.synchronize() diff --git a/source/isaaclab_experimental/test/envs/mdp/test_actions_warp_parity.py b/source/isaaclab_experimental/test/envs/mdp/test_actions_warp_parity.py new file mode 100644 index 000000000000..237632efa84c --- /dev/null +++ b/source/isaaclab_experimental/test/envs/mdp/test_actions_warp_parity.py @@ -0,0 +1,227 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Parity tests for warp-first action MDP terms.""" + +from __future__ import annotations + +import numpy as np +import pytest +import torch +import warp as wp + +wp.init() +pytestmark = pytest.mark.skipif(not wp.is_cuda_available(), reason="CUDA device required") + +from isaaclab_experimental.envs.mdp.actions import ( + JointEffortAction, + JointEffortActionCfg, + JointPositionAction, + JointPositionActionCfg, +) +from parity_helpers import MockArticulation, MockArticulationData, MockScene, copy_np_to_wp + +NUM_ENVS = 32 +NUM_JOINTS = 6 +NUM_BODIES = 3 +DEVICE = "cuda:0" +ATOL = 1e-5 +RTOL = 1e-5 +JOINT_NAMES = [f"joint_{i}" for i in range(NUM_JOINTS)] + + +# ============================================================================ +# Mock infrastructure +# ============================================================================ + + +class MockEnv: + def __init__(self, asset): + self.scene = MockScene({"robot": asset}, env_origins=None) + self.num_envs = NUM_ENVS + self.device = DEVICE + + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture() +def art_data(): + data = MockArticulationData(num_envs=NUM_ENVS, num_joints=NUM_JOINTS, num_bodies=NUM_BODIES) + # Override defaults with specific per-joint values for action tests + copy_np_to_wp( + data.default_joint_pos, + np.tile([0.1, 0.2, 0.3, 0.4, 0.5, 0.6], (NUM_ENVS, 1)).astype(np.float32), + ) + # Body quaternion for NonHolonomicAction (identity = [0,0,0,1] in xyzw) + quat_np = np.zeros((NUM_ENVS, NUM_BODIES, 4), dtype=np.float32) + quat_np[:, :, 3] = 1.0 + data.body_quat_w = wp.array(quat_np, dtype=wp.quatf, device=DEVICE) + data._num_joints = NUM_JOINTS + return data + + +@pytest.fixture() +def asset(art_data): + return MockArticulation(art_data, num_bodies=NUM_BODIES, num_joints=NUM_JOINTS) + + +@pytest.fixture() +def env(asset): + return MockEnv(asset) + + +@pytest.fixture() +def actions_wp(): + rng = np.random.RandomState(99) + return wp.array(rng.randn(NUM_ENVS, NUM_JOINTS).astype(np.float32), device=DEVICE) + + +# ============================================================================ +# Helpers +# ============================================================================ + + +def assert_close(actual, expected, atol=ATOL, rtol=RTOL): + if isinstance(actual, wp.array): + actual = wp.to_torch(actual) + if isinstance(expected, wp.array): + expected = wp.to_torch(expected) + torch.testing.assert_close(actual.float(), expected.float(), atol=atol, rtol=rtol) + + +# ============================================================================ +# Joint action tests (JointPosition, JointEffort) +# ============================================================================ + + +class TestJointActions: + """Test JointAction subclasses: process, apply, reset.""" + + def test_joint_effort_process_apply(self, env, asset, actions_wp): + cfg = JointEffortActionCfg(asset_name="robot", joint_names=[".*"]) + term = JointEffortAction(cfg, env) + + term.process_actions(actions_wp, action_offset=0) + term.apply_actions() + + # Processed = raw * scale(1.0) + offset(0.0) = raw + assert_close(term.processed_actions, actions_wp) + assert asset.last_effort_target is not None + + def test_joint_position_default_offset(self, env, asset, art_data, actions_wp): + cfg = JointPositionActionCfg(asset_name="robot", joint_names=[".*"], use_default_offset=True) + term = JointPositionAction(cfg, env) + + term.process_actions(actions_wp, action_offset=0) + term.apply_actions() + + # Processed = raw * 1.0 + default_joint_pos[0] + defaults = wp.to_torch(art_data.default_joint_pos)[0] + raw = wp.to_torch(actions_wp) + expected = raw + defaults.unsqueeze(0) + assert_close(term.processed_actions, expected) + assert asset.last_pos_target is not None + + def test_joint_action_reset(self, env, asset, actions_wp): + cfg = JointEffortActionCfg(asset_name="robot", joint_names=[".*"]) + term = JointEffortAction(cfg, env) + + # Process some actions + term.process_actions(actions_wp, action_offset=0) + assert wp.to_torch(term.raw_actions).abs().sum() > 0 + + # Reset all + term.reset(env_mask=None) + assert_close(term.raw_actions, wp.zeros_like(term.raw_actions)) + + def test_joint_action_reset_masked(self, env, asset, actions_wp): + cfg = JointEffortActionCfg(asset_name="robot", joint_names=[".*"]) + term = JointEffortAction(cfg, env) + + term.process_actions(actions_wp, action_offset=0) + raw_before = wp.to_torch(term.raw_actions).clone() + + # Reset only first half + mask_np = [i < NUM_ENVS // 2 for i in range(NUM_ENVS)] + mask = wp.array(mask_np, dtype=wp.bool, device=DEVICE) + term.reset(env_mask=mask) + + raw_after = wp.to_torch(term.raw_actions) + # First half zeroed + assert_close(raw_after[: NUM_ENVS // 2], torch.zeros(NUM_ENVS // 2, NUM_JOINTS, device=DEVICE)) + # Second half unchanged + assert_close(raw_after[NUM_ENVS // 2 :], raw_before[NUM_ENVS // 2 :]) + + def test_joint_action_with_scale(self, env, asset, actions_wp): + cfg = JointEffortActionCfg(asset_name="robot", joint_names=[".*"], scale=2.5) + term = JointEffortAction(cfg, env) + + term.process_actions(actions_wp, action_offset=0) + + raw = wp.to_torch(actions_wp) + expected = raw * 2.5 + assert_close(term.processed_actions, expected) + + +# ============================================================================ +# Mathematical parity tests: warp processed_actions == raw * scale + offset +# (This is the same formula used by the stable JointAction.process_actions.) +# ============================================================================ + + +class TestJointActionMathParity: + """Verify warp processed_actions match the affine formula raw * scale + offset. + + The stable ``JointAction.process_actions`` computes + ``processed = raw * scale + offset``. These tests verify the warp + implementation produces identical results for various scale/offset + configurations, confirming mathematical parity without needing to + instantiate the stable classes (which require a full env). + """ + + def test_effort_identity(self, env, actions_wp): + """scale=1, offset=0 -> processed == raw.""" + cfg = JointEffortActionCfg(asset_name="robot", joint_names=[".*"]) + term = JointEffortAction(cfg, env) + term.process_actions(actions_wp, action_offset=0) + + raw = wp.to_torch(actions_wp) + expected = raw * 1.0 + 0.0 + assert_close(term.processed_actions, expected) + + def test_effort_with_scale(self, env, actions_wp): + """scale=3.0, offset=0 -> processed == raw * 3.""" + cfg = JointEffortActionCfg(asset_name="robot", joint_names=[".*"], scale=3.0) + term = JointEffortAction(cfg, env) + term.process_actions(actions_wp, action_offset=0) + + raw = wp.to_torch(actions_wp) + expected = raw * 3.0 + assert_close(term.processed_actions, expected) + + def test_position_with_default_offset(self, env, art_data, actions_wp): + """use_default_offset=True -> processed == raw + defaults[0].""" + cfg = JointPositionActionCfg(asset_name="robot", joint_names=[".*"], use_default_offset=True) + term = JointPositionAction(cfg, env) + term.process_actions(actions_wp, action_offset=0) + + raw = wp.to_torch(actions_wp) + defaults = wp.to_torch(art_data.default_joint_pos)[0] + expected = raw * 1.0 + defaults.unsqueeze(0) + assert_close(term.processed_actions, expected) + + def test_position_scale_and_offset(self, env, art_data, actions_wp): + """scale=2, use_default_offset=True -> processed == raw * 2 + defaults[0].""" + cfg = JointPositionActionCfg(asset_name="robot", joint_names=[".*"], scale=2.0, use_default_offset=True) + term = JointPositionAction(cfg, env) + term.process_actions(actions_wp, action_offset=0) + + raw = wp.to_torch(actions_wp) + defaults = wp.to_torch(art_data.default_joint_pos)[0] + expected = raw * 2.0 + defaults.unsqueeze(0) + assert_close(term.processed_actions, expected) diff --git a/source/isaaclab_experimental/test/envs/mdp/test_events_warp_parity.py b/source/isaaclab_experimental/test/envs/mdp/test_events_warp_parity.py new file mode 100644 index 000000000000..d29f15f52d8b --- /dev/null +++ b/source/isaaclab_experimental/test/envs/mdp/test_events_warp_parity.py @@ -0,0 +1,340 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Parity tests for warp-first event MDP terms.""" + +from __future__ import annotations + +import numpy as np +import pytest +import torch +import warp as wp + +# Skip entire module if no CUDA device available +wp.init() +pytestmark = pytest.mark.skipif(not wp.is_cuda_available(), reason="CUDA device required") + +import isaaclab_experimental.envs.mdp.events as warp_evt +from parity_helpers import ( + DEVICE, + NUM_ACTIONS, + NUM_ENVS, + NUM_JOINTS, + MockActionManagerTorch, + MockActionManagerWarp, + MockArticulation, + MockArticulationData, + MockScene, + MockSceneEntityCfg, + assert_close, + copy_np_to_wp, +) + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture(autouse=True) +def _clear_function_caches(): + """Clear first-call caches on warp MDP functions so each test starts fresh. + + Functions that cache warp views via the ``hasattr`` pattern need clearing + between tests to avoid stale references from prior fixtures. + """ + yield + for fn in ( + warp_evt.push_by_setting_velocity, + warp_evt.apply_external_force_torque, + warp_evt.reset_root_state_uniform, + warp_evt.randomize_rigid_body_com, + ): + for attr in list(vars(fn)): + if attr.startswith("_"): + delattr(fn, attr) + + +@pytest.fixture() +def art_data(): + return MockArticulationData(NUM_ENVS, NUM_JOINTS, DEVICE) + + +@pytest.fixture() +def env_origins(): + rng = np.random.RandomState(77) + origins_np = rng.randn(NUM_ENVS, 3).astype(np.float32) + return wp.array(origins_np, dtype=wp.vec3f, device=DEVICE) + + +@pytest.fixture() +def scene(art_data, env_origins): + return MockScene({"robot": MockArticulation(art_data)}, env_origins) + + +@pytest.fixture() +def action_wp(): + rng = np.random.RandomState(99) + a = wp.array(rng.randn(NUM_ENVS, NUM_ACTIONS).astype(np.float32), device=DEVICE) + b = wp.array(rng.randn(NUM_ENVS, NUM_ACTIONS).astype(np.float32), device=DEVICE) + return a, b + + +@pytest.fixture() +def episode_length_buf(): + torch.manual_seed(55) + return torch.randint(0, 500, (NUM_ENVS,), dtype=torch.int64, device=DEVICE) + + +@pytest.fixture() +def warp_env(scene, action_wp, episode_length_buf): + """Env with warp action manager (for experimental functions).""" + + class _Env: + pass + + env = _Env() + env.scene = scene + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length_s = 10.0 + # RNG state for events (seeded deterministically) + env.rng_state_wp = wp.array(np.arange(NUM_ENVS, dtype=np.uint32) + 42, device=DEVICE) + return env + + +@pytest.fixture() +def stable_env(scene, action_wp, episode_length_buf): + """Env with torch action manager (for stable functions).""" + + class _Env: + pass + + env = _Env() + env.scene = scene + env.action_manager = MockActionManagerTorch(action_wp[0], action_wp[1]) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length_s = 10.0 + return env + + +@pytest.fixture() +def all_joints_cfg(): + return MockSceneEntityCfg("robot", list(range(NUM_JOINTS)), NUM_JOINTS, DEVICE) + + +# ============================================================================ +# Event parity tests: deterministic (zero-width range) warp vs stable +# ============================================================================ + + +class TestEventParity: + """Verify warp event functions produce the same result as stable torch equivalents. + + Since warp and stable use different RNG implementations, parity is tested using + deterministic (zero-width) ranges where randomness has no effect. Both must + produce ``default + 0`` (offset) or ``default * 1`` (scale), clamped to limits. + """ + + def test_reset_joints_by_offset_parity(self, warp_env, stable_env, art_data, all_joints_cfg): + """Zero-offset: both warp and stable should produce clamped defaults.""" + cfg = all_joints_cfg + mask = wp.array([True] * NUM_ENVS, dtype=wp.bool, device=DEVICE) + + # Set known defaults + new_defaults = np.full((NUM_ENVS, NUM_JOINTS), 0.5, dtype=np.float32) + copy_np_to_wp(art_data.default_joint_pos, new_defaults) + + # Run warp version + warp_evt.reset_joints_by_offset( + warp_env, mask, position_range=(0.0, 0.0), velocity_range=(0.0, 0.0), asset_cfg=cfg + ) + wp.synchronize() + warp_pos = wp.to_torch(art_data.joint_pos).clone() + warp_vel = wp.to_torch(art_data.joint_vel).clone() + + # Run stable version (writes via write_joint_position_to_sim_index — which our mock + # does not implement, so we compute the expected result directly) + defaults_t = wp.to_torch(art_data.default_joint_pos).clone() + limits_t = wp.to_torch(art_data.soft_joint_pos_limits) + vel_limits_t = wp.to_torch(art_data.soft_joint_vel_limits) + expected_pos = defaults_t.clamp(limits_t[..., 0], limits_t[..., 1]) + expected_vel = wp.to_torch(art_data.default_joint_vel).clone().clamp(-vel_limits_t, vel_limits_t) + + assert_close(warp_pos, expected_pos) + assert_close(warp_vel, expected_vel) + + def test_reset_joints_by_scale_parity(self, warp_env, stable_env, art_data, all_joints_cfg): + """Scale=1.0: both warp and stable should produce clamped defaults.""" + cfg = all_joints_cfg + mask = wp.array([True] * NUM_ENVS, dtype=wp.bool, device=DEVICE) + + # Set known defaults + new_defaults = np.full((NUM_ENVS, NUM_JOINTS), 0.25, dtype=np.float32) + copy_np_to_wp(art_data.default_joint_pos, new_defaults) + + # Run warp version + warp_evt.reset_joints_by_scale( + warp_env, mask, position_range=(1.0, 1.0), velocity_range=(1.0, 1.0), asset_cfg=cfg + ) + wp.synchronize() + warp_pos = wp.to_torch(art_data.joint_pos).clone() + warp_vel = wp.to_torch(art_data.joint_vel).clone() + + # Expected: default * 1.0, clamped to limits + defaults_t = wp.to_torch(art_data.default_joint_pos).clone() + limits_t = wp.to_torch(art_data.soft_joint_pos_limits) + vel_limits_t = wp.to_torch(art_data.soft_joint_vel_limits) + expected_pos = defaults_t.clamp(limits_t[..., 0], limits_t[..., 1]) + expected_vel = wp.to_torch(art_data.default_joint_vel).clone().clamp(-vel_limits_t, vel_limits_t) + + assert_close(warp_pos, expected_pos) + assert_close(warp_vel, expected_vel) + + +# ============================================================================ +# Event capture-mutate-replay tests (from test_mdp_warp_parity.py) +# ============================================================================ + + +class TestEventCapturedDataMutation: + """Verify event functions are capture-safe and react to mutated input data.""" + + # -- reset_joints_by_offset ------------------------------------------------- + + def test_reset_joints_by_offset(self, warp_env, art_data, all_joints_cfg): + """With zero-width offset, result == defaults. Mutate defaults -> result tracks.""" + cfg = all_joints_cfg + mask = wp.array([True] * NUM_ENVS, dtype=wp.bool, device=DEVICE) + + # Warm-up + warp_evt.reset_joints_by_offset( + warp_env, mask, position_range=(0.0, 0.0), velocity_range=(0.0, 0.0), asset_cfg=cfg + ) + + # Capture + with wp.ScopedCapture() as cap: + warp_evt.reset_joints_by_offset( + warp_env, mask, position_range=(0.0, 0.0), velocity_range=(0.0, 0.0), asset_cfg=cfg + ) + + # Mutate defaults in-place + new_defaults = np.full((NUM_ENVS, NUM_JOINTS), 0.5, dtype=np.float32) + copy_np_to_wp(art_data.default_joint_pos, new_defaults) + + # Replay + wp.capture_launch(cap.graph) + wp.synchronize() + + # With zero offset, joint_pos should equal new defaults (clamped to limits [-3.14, 3.14]) + result = wp.to_torch(art_data.joint_pos) + expected = torch.full((NUM_ENVS, NUM_JOINTS), 0.5, device=DEVICE) + assert_close(result, expected) + + # -- reset_joints_by_scale -------------------------------------------------- + + def test_reset_joints_by_scale(self, warp_env, art_data, all_joints_cfg): + """With scale=1.0, result == defaults. Mutate defaults -> result tracks.""" + cfg = all_joints_cfg + mask = wp.array([True] * NUM_ENVS, dtype=wp.bool, device=DEVICE) + + warp_evt.reset_joints_by_scale( + warp_env, mask, position_range=(1.0, 1.0), velocity_range=(1.0, 1.0), asset_cfg=cfg + ) + with wp.ScopedCapture() as cap: + warp_evt.reset_joints_by_scale( + warp_env, mask, position_range=(1.0, 1.0), velocity_range=(1.0, 1.0), asset_cfg=cfg + ) + + new_defaults = np.full((NUM_ENVS, NUM_JOINTS), 0.25, dtype=np.float32) + copy_np_to_wp(art_data.default_joint_pos, new_defaults) + + wp.capture_launch(cap.graph) + wp.synchronize() + + result = wp.to_torch(art_data.joint_pos) + expected = torch.full((NUM_ENVS, NUM_JOINTS), 0.25, device=DEVICE) + assert_close(result, expected) + + # -- push_by_setting_velocity ----------------------------------------------- + + def test_push_by_setting_velocity(self, warp_env, art_data, all_joints_cfg): + """With zero-width velocity range, scratch == root_vel_w. Mutate root_vel_w -> scratch tracks.""" + mask = wp.array([True] * NUM_ENVS, dtype=wp.bool, device=DEVICE) + zero_range = { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + } + + warp_evt.push_by_setting_velocity(warp_env, mask, velocity_range=zero_range) + with wp.ScopedCapture() as cap: + warp_evt.push_by_setting_velocity(warp_env, mask, velocity_range=zero_range) + + # Mutate root_vel_w + new_vel = np.tile([1.0, 2.0, 3.0, 0.1, 0.2, 0.3], (NUM_ENVS, 1)).astype(np.float32) + copy_np_to_wp(art_data.root_vel_w, new_vel) + + wp.capture_launch(cap.graph) + wp.synchronize() + + scratch = wp.to_torch(warp_evt.push_by_setting_velocity._scratch_vel) + expected = torch.tensor([1.0, 2.0, 3.0, 0.1, 0.2, 0.3], device=DEVICE).expand(NUM_ENVS, -1) + assert_close(scratch, expected) + + # -- apply_external_force_torque -------------------------------------------- + + def test_apply_external_force_torque(self, warp_env, art_data, all_joints_cfg): + """With zero-width ranges, forces/torques are zero. Non-zero ranges produce non-zero output.""" + mask = wp.array([True] * NUM_ENVS, dtype=wp.bool, device=DEVICE) + + # Zero-range: forces and torques should be zero + warp_evt.apply_external_force_torque(warp_env, mask, force_range=(0.0, 0.0), torque_range=(0.0, 0.0)) + with wp.ScopedCapture() as cap: + warp_evt.apply_external_force_torque(warp_env, mask, force_range=(0.0, 0.0), torque_range=(0.0, 0.0)) + wp.capture_launch(cap.graph) + wp.synchronize() + + forces = wp.to_torch(warp_evt.apply_external_force_torque._scratch_forces) + torques = wp.to_torch(warp_evt.apply_external_force_torque._scratch_torques) + assert_close(forces, torch.zeros_like(forces)) + assert_close(torques, torch.zeros_like(torques)) + + # -- reset_root_state_uniform ----------------------------------------------- + + # -- env_mask selectivity --------------------------------------------------- + + def test_reset_joints_mask_selectivity(self, warp_env, art_data, all_joints_cfg): + """Only masked envs are modified; unmasked envs retain their state.""" + cfg = all_joints_cfg + # Mask: only first half of envs + mask_np = np.array([i < NUM_ENVS // 2 for i in range(NUM_ENVS)]) + mask = wp.array(mask_np, dtype=wp.bool, device=DEVICE) + + # Set joint_pos to a known value + sentinel = np.full((NUM_ENVS, NUM_JOINTS), 999.0, dtype=np.float32) + copy_np_to_wp(art_data.joint_pos, sentinel) + + # Set defaults to 0 + copy_np_to_wp(art_data.default_joint_pos, np.zeros((NUM_ENVS, NUM_JOINTS), dtype=np.float32)) + + warp_evt.reset_joints_by_offset( + warp_env, mask, position_range=(0.0, 0.0), velocity_range=(0.0, 0.0), asset_cfg=cfg + ) + wp.synchronize() + + result = wp.to_torch(art_data.joint_pos) + # Masked envs: reset to 0 (defaults + 0 offset) + assert_close(result[: NUM_ENVS // 2], torch.zeros(NUM_ENVS // 2, NUM_JOINTS, device=DEVICE)) + # Unmasked envs: still 999.0 + assert_close(result[NUM_ENVS // 2 :], torch.full((NUM_ENVS // 2, NUM_JOINTS), 999.0, device=DEVICE)) diff --git a/source/isaaclab_experimental/test/envs/mdp/test_observations_warp_parity.py b/source/isaaclab_experimental/test/envs/mdp/test_observations_warp_parity.py new file mode 100644 index 000000000000..9f1eea23bf8d --- /dev/null +++ b/source/isaaclab_experimental/test/envs/mdp/test_observations_warp_parity.py @@ -0,0 +1,458 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Parity tests for warp-first observation MDP terms.""" + +from __future__ import annotations + +import numpy as np +import pytest +import torch +import warp as wp + +# Skip entire module if no CUDA device available +wp.init() +pytestmark = pytest.mark.skipif(not wp.is_cuda_available(), reason="CUDA device required") + +import isaaclab_experimental.envs.mdp.observations as warp_obs +from parity_helpers import ( + CMD_DIM, + DEVICE, + NUM_ACTIONS, + NUM_BODIES, + NUM_ENVS, + NUM_JOINTS, + MockActionManagerTorch, + MockActionManagerWarp, + MockArticulation, + MockArticulationData, + MockCommandManager, + MockCommandTerm, + MockContactSensor, + MockContactSensorData, + MockScene, + MockSceneEntityCfg, + assert_close, + mutate_art_data, + run_warp_obs, + run_warp_obs_captured, +) + +import isaaclab.envs.mdp.observations as stable_obs + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture(autouse=True) +def _clear_caches(): + yield + for fn in [warp_obs.generated_commands]: + for attr in list(vars(fn)): + if attr.startswith("_"): + delattr(fn, attr) + + +@pytest.fixture() +def art_data(): + return MockArticulationData(NUM_ENVS, NUM_JOINTS, DEVICE) + + +@pytest.fixture() +def art_data_bodies(): + return MockArticulationData(num_bodies=NUM_BODIES) + + +@pytest.fixture() +def env_origins(): + rng = np.random.RandomState(77) + origins_np = rng.randn(NUM_ENVS, 3).astype(np.float32) + return wp.array(origins_np, dtype=wp.vec3f, device=DEVICE) + + +@pytest.fixture() +def contact_data(): + return MockContactSensorData() + + +@pytest.fixture() +def cmd_tensor(): + rng = np.random.RandomState(99) + return torch.tensor(rng.randn(NUM_ENVS, CMD_DIM).astype(np.float32), device=DEVICE) + + +@pytest.fixture() +def cmd_term(): + return MockCommandTerm() + + +@pytest.fixture() +def scene(art_data, env_origins): + return MockScene({"robot": MockArticulation(art_data)}, env_origins) + + +@pytest.fixture() +def scene_bodies(art_data_bodies, env_origins, contact_data): + art = MockArticulation(art_data_bodies, num_bodies=NUM_BODIES) + sensor = MockContactSensor(contact_data) + return MockScene({"robot": art}, env_origins, sensors={"contact_sensor": sensor}) + + +@pytest.fixture() +def action_wp(): + rng = np.random.RandomState(99) + a = wp.array(rng.randn(NUM_ENVS, NUM_ACTIONS).astype(np.float32), device=DEVICE) + b = wp.array(rng.randn(NUM_ENVS, NUM_ACTIONS).astype(np.float32), device=DEVICE) + return a, b # (action, prev_action) + + +@pytest.fixture() +def episode_length_buf(): + torch.manual_seed(55) + return torch.randint(0, 500, (NUM_ENVS,), dtype=torch.int64, device=DEVICE) + + +@pytest.fixture() +def warp_env(scene, action_wp, episode_length_buf): + """Env with warp action manager (for experimental functions).""" + + class _Env: + pass + + env = _Env() + env.scene = scene + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length_s = 10.0 + env.rng_state_wp = wp.array(np.arange(NUM_ENVS, dtype=np.uint32) + 42, device=DEVICE) + return env + + +@pytest.fixture() +def stable_env(scene, action_wp, episode_length_buf): + """Env with torch action manager (for stable functions).""" + + class _Env: + pass + + env = _Env() + env.scene = scene + env.action_manager = MockActionManagerTorch(action_wp[0], action_wp[1]) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length_s = 10.0 + return env + + +@pytest.fixture() +def warp_env_bodies(scene_bodies, action_wp, episode_length_buf, cmd_tensor, cmd_term): + """Env with body-level data and command manager (for new-terms observation tests).""" + + class _Env: + pass + + env = _Env() + env.scene = scene_bodies + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.command_manager = MockCommandManager(cmd_tensor, cmd_term) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length = 500 + env.max_episode_length_s = 10.0 + env.rng_state_wp = wp.array(np.arange(NUM_ENVS, dtype=np.uint32) + 42, device=DEVICE) + return env + + +@pytest.fixture() +def stable_env_bodies(scene_bodies, action_wp, episode_length_buf, cmd_tensor, cmd_term): + """Env with body-level data and command manager (for stable new-terms observation tests).""" + + class _Env: + pass + + env = _Env() + env.scene = scene_bodies + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.command_manager = MockCommandManager(cmd_tensor, cmd_term) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length = 500 + env.max_episode_length_s = 10.0 + return env + + +@pytest.fixture() +def all_joints_cfg(): + return MockSceneEntityCfg("robot", list(range(NUM_JOINTS)), NUM_JOINTS, DEVICE) + + +@pytest.fixture() +def subset_cfg(): + return MockSceneEntityCfg("robot", [0, 2, 5, 8], NUM_JOINTS, DEVICE) + + +# ============================================================================ +# Observation parity tests (from test_mdp_warp_parity.py) +# ============================================================================ + + +class TestObservationParity: + """Verify experimental observation Warp kernels match stable torch implementations.""" + + # -- Root state observations ------------------------------------------------ + + def test_base_pos_z(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_obs.base_pos_z(stable_env, asset_cfg=cfg) + actual = run_warp_obs(warp_obs.base_pos_z, warp_env, (NUM_ENVS, 1), asset_cfg=cfg) + actual_cap = run_warp_obs_captured(warp_obs.base_pos_z, warp_env, (NUM_ENVS, 1), asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_base_lin_vel(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_obs.base_lin_vel(stable_env, asset_cfg=cfg) + actual = run_warp_obs(warp_obs.base_lin_vel, warp_env, (NUM_ENVS, 3), asset_cfg=cfg) + actual_cap = run_warp_obs_captured(warp_obs.base_lin_vel, warp_env, (NUM_ENVS, 3), asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_base_ang_vel(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_obs.base_ang_vel(stable_env, asset_cfg=cfg) + actual = run_warp_obs(warp_obs.base_ang_vel, warp_env, (NUM_ENVS, 3), asset_cfg=cfg) + actual_cap = run_warp_obs_captured(warp_obs.base_ang_vel, warp_env, (NUM_ENVS, 3), asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_projected_gravity(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_obs.projected_gravity(stable_env, asset_cfg=cfg) + actual = run_warp_obs(warp_obs.projected_gravity, warp_env, (NUM_ENVS, 3), asset_cfg=cfg) + actual_cap = run_warp_obs_captured(warp_obs.projected_gravity, warp_env, (NUM_ENVS, 3), asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Joint observations (all joints) ---------------------------------------- + + def test_joint_pos_all(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_obs.joint_pos(stable_env, asset_cfg=cfg) + actual = run_warp_obs(warp_obs.joint_pos, warp_env, (NUM_ENVS, NUM_JOINTS), asset_cfg=cfg) + actual_cap = run_warp_obs_captured(warp_obs.joint_pos, warp_env, (NUM_ENVS, NUM_JOINTS), asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_joint_vel_all(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_obs.joint_vel(stable_env, asset_cfg=cfg) + actual = run_warp_obs(warp_obs.joint_vel, warp_env, (NUM_ENVS, NUM_JOINTS), asset_cfg=cfg) + actual_cap = run_warp_obs_captured(warp_obs.joint_vel, warp_env, (NUM_ENVS, NUM_JOINTS), asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Joint observations (subset) ------------------------------------------- + + def test_joint_pos_subset(self, warp_env, stable_env, subset_cfg): + cfg = subset_cfg + n_selected = len(cfg.joint_ids) + expected = stable_obs.joint_pos(stable_env, asset_cfg=cfg) + actual = run_warp_obs(warp_obs.joint_pos, warp_env, (NUM_ENVS, n_selected), asset_cfg=cfg) + actual_cap = run_warp_obs_captured(warp_obs.joint_pos, warp_env, (NUM_ENVS, n_selected), asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_joint_vel_subset(self, warp_env, stable_env, subset_cfg): + cfg = subset_cfg + n_selected = len(cfg.joint_ids) + expected = stable_obs.joint_vel(stable_env, asset_cfg=cfg) + actual = run_warp_obs(warp_obs.joint_vel, warp_env, (NUM_ENVS, n_selected), asset_cfg=cfg) + actual_cap = run_warp_obs_captured(warp_obs.joint_vel, warp_env, (NUM_ENVS, n_selected), asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Normalized joint position ---------------------------------------------- + + def test_joint_pos_limit_normalized(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_obs.joint_pos_limit_normalized(stable_env, asset_cfg=cfg) + actual = run_warp_obs(warp_obs.joint_pos_limit_normalized, warp_env, (NUM_ENVS, NUM_JOINTS), asset_cfg=cfg) + actual_cap = run_warp_obs_captured( + warp_obs.joint_pos_limit_normalized, warp_env, (NUM_ENVS, NUM_JOINTS), asset_cfg=cfg + ) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Action observation ----------------------------------------------------- + + def test_last_action(self, warp_env, stable_env, action_wp): + # Stable last_action returns env.action_manager.action (torch tensor) + expected = stable_obs.last_action(stable_env) + actual = run_warp_obs(warp_obs.last_action, warp_env, (NUM_ENVS, NUM_ACTIONS)) + actual_cap = run_warp_obs_captured(warp_obs.last_action, warp_env, (NUM_ENVS, NUM_ACTIONS)) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + +# ============================================================================ +# Observation parity tests (from test_mdp_warp_parity_new_terms.py) +# ============================================================================ + + +class TestObservationParityNewTerms: + """Verify observation Warp kernels for newly migrated terms match stable torch implementations.""" + + def test_generated_commands(self, warp_env_bodies, stable_env_bodies): + expected = stable_obs.generated_commands(stable_env_bodies, command_name="vel") + actual = run_warp_obs(warp_obs.generated_commands, warp_env_bodies, (NUM_ENVS, CMD_DIM), command_name="vel") + actual_cap = run_warp_obs_captured( + warp_obs.generated_commands, warp_env_bodies, (NUM_ENVS, CMD_DIM), command_name="vel" + ) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + +# ============================================================================ +# Capture-then-mutate-then-replay observation tests (from test_mdp_warp_parity.py) +# ============================================================================ + + +def _mutate_art_data(art_data: MockArticulationData, warp_env, rng_seed: int = 200): + """Mutate every data array in-place so captured graphs see fresh values.""" + mutate_art_data(art_data, warp_env, rng_seed=rng_seed) + + +class TestCapturedDataMutationObservations: + """Capture a graph, mutate buffer data in-place, replay -- results must match stable on the *new* data. + + This verifies observation MDP functions are truly capture-safe. + """ + + def _capture_mutate_check_obs(self, warp_fn, stable_fn, warp_env, stable_env, art_data, shape, **kwargs): + out = wp.zeros(shape, dtype=wp.float32, device=DEVICE) + warp_fn(warp_env, out, **kwargs) # warm-up + with wp.ScopedCapture() as cap: + warp_fn(warp_env, out, **kwargs) + _mutate_art_data(art_data, warp_env) + wp.capture_launch(cap.graph) + assert_close(wp.to_torch(out).clone(), stable_fn(stable_env, **kwargs)) + + def test_base_pos_z(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_obs( + warp_obs.base_pos_z, + stable_obs.base_pos_z, + warp_env, + stable_env, + art_data, + (NUM_ENVS, 1), + asset_cfg=all_joints_cfg, + ) + + def test_base_lin_vel(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_obs( + warp_obs.base_lin_vel, + stable_obs.base_lin_vel, + warp_env, + stable_env, + art_data, + (NUM_ENVS, 3), + asset_cfg=all_joints_cfg, + ) + + def test_base_ang_vel(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_obs( + warp_obs.base_ang_vel, + stable_obs.base_ang_vel, + warp_env, + stable_env, + art_data, + (NUM_ENVS, 3), + asset_cfg=all_joints_cfg, + ) + + def test_projected_gravity(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_obs( + warp_obs.projected_gravity, + stable_obs.projected_gravity, + warp_env, + stable_env, + art_data, + (NUM_ENVS, 3), + asset_cfg=all_joints_cfg, + ) + + def test_joint_pos(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_obs( + warp_obs.joint_pos, + stable_obs.joint_pos, + warp_env, + stable_env, + art_data, + (NUM_ENVS, NUM_JOINTS), + asset_cfg=all_joints_cfg, + ) + + def test_joint_vel(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_obs( + warp_obs.joint_vel, + stable_obs.joint_vel, + warp_env, + stable_env, + art_data, + (NUM_ENVS, NUM_JOINTS), + asset_cfg=all_joints_cfg, + ) + + def test_joint_pos_limit_normalized(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_obs( + warp_obs.joint_pos_limit_normalized, + stable_obs.joint_pos_limit_normalized, + warp_env, + stable_env, + art_data, + (NUM_ENVS, NUM_JOINTS), + asset_cfg=all_joints_cfg, + ) + + def test_last_action(self, warp_env, stable_env, art_data): + self._capture_mutate_check_obs( + warp_obs.last_action, + stable_obs.last_action, + warp_env, + stable_env, + art_data, + (NUM_ENVS, NUM_ACTIONS), + ) + + +# ============================================================================ +# Capture-mutate-replay observation tests (from test_mdp_warp_parity_new_terms.py) +# ============================================================================ + + +class TestCapturedDataMutationObservationsNewTerms: + """Capture graph, mutate buffer data, replay -- verify new-terms observation results match stable.""" + + def test_generated_commands(self, warp_env_bodies, stable_env_bodies, art_data_bodies, cmd_tensor): + """Mutate command tensor, replay captured graph, verify new commands are read.""" + out = wp.zeros((NUM_ENVS, CMD_DIM), dtype=wp.float32, device=DEVICE) + warp_obs.generated_commands(warp_env_bodies, out, command_name="vel") + with wp.ScopedCapture() as cap: + warp_obs.generated_commands(warp_env_bodies, out, command_name="vel") + # Mutate the command tensor in-place (zero-copy view picks it up) + cmd_tensor[:] = torch.randn_like(cmd_tensor) + wp.capture_launch(cap.graph) + expected = stable_obs.generated_commands(stable_env_bodies, command_name="vel") + assert_close(wp.to_torch(out).clone(), expected) diff --git a/source/isaaclab_experimental/test/envs/mdp/test_rewards_warp_parity.py b/source/isaaclab_experimental/test/envs/mdp/test_rewards_warp_parity.py new file mode 100644 index 000000000000..69cabcd14a36 --- /dev/null +++ b/source/isaaclab_experimental/test/envs/mdp/test_rewards_warp_parity.py @@ -0,0 +1,567 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Parity tests for warp-first reward MDP terms.""" + +from __future__ import annotations + +import numpy as np +import pytest +import torch +import warp as wp + +# Skip entire module if no CUDA device available +wp.init() +pytestmark = pytest.mark.skipif(not wp.is_cuda_available(), reason="CUDA device required") + +import isaaclab_experimental.envs.mdp.rewards as warp_rew +from parity_helpers import ( + BODY_IDS, + CMD_DIM, + DEVICE, + NUM_ACTIONS, + NUM_BODIES, + NUM_ENVS, + NUM_JOINTS, + MockActionManagerTorch, + MockActionManagerWarp, + MockArticulation, + MockArticulationData, + MockBodyCfg, + MockCommandManager, + MockCommandTerm, + MockContactSensor, + MockContactSensorData, + MockScene, + MockSceneEntityCfg, + MockSensorCfg, + MockTerminationManager, + assert_close, + mutate_art_data, + mutate_body_data, + run_warp_rew, + run_warp_rew_captured, +) + +import isaaclab.envs.mdp.rewards as stable_rew + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture(autouse=True) +def _clear_caches(): + yield + for fn in [warp_rew.track_lin_vel_xy_exp, warp_rew.track_ang_vel_z_exp, warp_rew.undesired_contacts]: + for attr in list(vars(fn)): + if attr.startswith("_"): + delattr(fn, attr) + + +@pytest.fixture() +def art_data(): + return MockArticulationData(NUM_ENVS, NUM_JOINTS, DEVICE) + + +@pytest.fixture() +def art_data_bodies(): + return MockArticulationData(num_bodies=NUM_BODIES) + + +@pytest.fixture() +def env_origins(): + rng = np.random.RandomState(77) + origins_np = rng.randn(NUM_ENVS, 3).astype(np.float32) + return wp.array(origins_np, dtype=wp.vec3f, device=DEVICE) + + +@pytest.fixture() +def contact_data(): + return MockContactSensorData() + + +@pytest.fixture() +def cmd_tensor(): + rng = np.random.RandomState(99) + return torch.tensor(rng.randn(NUM_ENVS, CMD_DIM).astype(np.float32), device=DEVICE) + + +@pytest.fixture() +def cmd_term(): + return MockCommandTerm() + + +@pytest.fixture() +def scene(art_data, env_origins): + return MockScene({"robot": MockArticulation(art_data)}, env_origins) + + +@pytest.fixture() +def scene_bodies(art_data_bodies, env_origins, contact_data): + art = MockArticulation(art_data_bodies, num_bodies=NUM_BODIES) + sensor = MockContactSensor(contact_data) + return MockScene({"robot": art}, env_origins, sensors={"contact_sensor": sensor}) + + +@pytest.fixture() +def action_wp(): + rng = np.random.RandomState(99) + a = wp.array(rng.randn(NUM_ENVS, NUM_ACTIONS).astype(np.float32), device=DEVICE) + b = wp.array(rng.randn(NUM_ENVS, NUM_ACTIONS).astype(np.float32), device=DEVICE) + return a, b + + +@pytest.fixture() +def episode_length_buf(): + torch.manual_seed(55) + return torch.randint(0, 500, (NUM_ENVS,), dtype=torch.int64, device=DEVICE) + + +@pytest.fixture() +def term_mgr(): + return MockTerminationManager() + + +@pytest.fixture() +def warp_env(scene, action_wp, episode_length_buf, term_mgr): + """Env with warp action manager (for experimental functions).""" + + class _Env: + pass + + env = _Env() + env.scene = scene + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.termination_manager = term_mgr + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length_s = 10.0 + env.rng_state_wp = wp.array(np.arange(NUM_ENVS, dtype=np.uint32) + 42, device=DEVICE) + return env + + +@pytest.fixture() +def stable_env(scene, action_wp, episode_length_buf, term_mgr): + """Env with torch action manager (for stable functions).""" + + class _Env: + pass + + env = _Env() + env.scene = scene + env.action_manager = MockActionManagerTorch(action_wp[0], action_wp[1]) + env.termination_manager = term_mgr + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length_s = 10.0 + return env + + +@pytest.fixture() +def warp_env_bodies(scene_bodies, action_wp, episode_length_buf, cmd_tensor, cmd_term): + """Env with body-level data and command manager (for new-terms reward tests).""" + + class _Env: + pass + + env = _Env() + env.scene = scene_bodies + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.command_manager = MockCommandManager(cmd_tensor, cmd_term) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length = 500 + env.max_episode_length_s = 10.0 + env.rng_state_wp = wp.array(np.arange(NUM_ENVS, dtype=np.uint32) + 42, device=DEVICE) + return env + + +@pytest.fixture() +def stable_env_bodies(scene_bodies, action_wp, episode_length_buf, cmd_tensor, cmd_term): + """Env with body-level data and command manager (for stable new-terms reward tests).""" + + class _Env: + pass + + env = _Env() + env.scene = scene_bodies + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.command_manager = MockCommandManager(cmd_tensor, cmd_term) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length = 500 + env.max_episode_length_s = 10.0 + return env + + +@pytest.fixture() +def all_joints_cfg(): + return MockSceneEntityCfg("robot", list(range(NUM_JOINTS)), NUM_JOINTS, DEVICE) + + +@pytest.fixture() +def body_cfg(): + return MockBodyCfg("robot", BODY_IDS) + + +@pytest.fixture() +def sensor_cfg(): + return MockSensorCfg("contact_sensor", BODY_IDS) + + +# ============================================================================ +# Reward parity tests (from test_mdp_warp_parity.py) +# ============================================================================ + + +class TestRewardParity: + """Verify experimental reward Warp kernels match stable torch implementations.""" + + # -- General rewards -------------------------------------------------------- + + def test_is_alive(self, warp_env, stable_env, term_mgr): + # Set some envs as terminated so the reward is non-trivial + term_mgr.terminated[::2] = True + expected = stable_rew.is_alive(stable_env) + actual = run_warp_rew(warp_rew.is_alive, warp_env) + actual_cap = run_warp_rew_captured(warp_rew.is_alive, warp_env) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_is_terminated(self, warp_env, stable_env, term_mgr): + term_mgr.terminated[::3] = True + expected = stable_rew.is_terminated(stable_env) + actual = run_warp_rew(warp_rew.is_terminated, warp_env) + actual_cap = run_warp_rew_captured(warp_rew.is_terminated, warp_env) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Root penalties --------------------------------------------------------- + + def test_lin_vel_z_l2(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_rew.lin_vel_z_l2(stable_env, asset_cfg=cfg) + actual = run_warp_rew(warp_rew.lin_vel_z_l2, warp_env, asset_cfg=cfg) + actual_cap = run_warp_rew_captured(warp_rew.lin_vel_z_l2, warp_env, asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_ang_vel_xy_l2(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_rew.ang_vel_xy_l2(stable_env, asset_cfg=cfg) + actual = run_warp_rew(warp_rew.ang_vel_xy_l2, warp_env, asset_cfg=cfg) + actual_cap = run_warp_rew_captured(warp_rew.ang_vel_xy_l2, warp_env, asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_flat_orientation_l2(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_rew.flat_orientation_l2(stable_env, asset_cfg=cfg) + actual = run_warp_rew(warp_rew.flat_orientation_l2, warp_env, asset_cfg=cfg) + actual_cap = run_warp_rew_captured(warp_rew.flat_orientation_l2, warp_env, asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Joint L2 penalties (masked) -------------------------------------------- + + def test_joint_vel_l2(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_rew.joint_vel_l2(stable_env, asset_cfg=cfg) + actual = run_warp_rew(warp_rew.joint_vel_l2, warp_env, asset_cfg=cfg) + actual_cap = run_warp_rew_captured(warp_rew.joint_vel_l2, warp_env, asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_joint_acc_l2(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_rew.joint_acc_l2(stable_env, asset_cfg=cfg) + actual = run_warp_rew(warp_rew.joint_acc_l2, warp_env, asset_cfg=cfg) + actual_cap = run_warp_rew_captured(warp_rew.joint_acc_l2, warp_env, asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_joint_torques_l2(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_rew.joint_torques_l2(stable_env, asset_cfg=cfg) + actual = run_warp_rew(warp_rew.joint_torques_l2, warp_env, asset_cfg=cfg) + actual_cap = run_warp_rew_captured(warp_rew.joint_torques_l2, warp_env, asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Joint L1 penalties (masked) -------------------------------------------- + + def test_joint_vel_l1(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_rew.joint_vel_l1(stable_env, asset_cfg=cfg) + actual = run_warp_rew(warp_rew.joint_vel_l1, warp_env, asset_cfg=cfg) + actual_cap = run_warp_rew_captured(warp_rew.joint_vel_l1, warp_env, asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Action penalties ------------------------------------------------------- + + def test_action_l2(self, warp_env, stable_env): + expected = stable_rew.action_l2(stable_env) + actual = run_warp_rew(warp_rew.action_l2, warp_env) + actual_cap = run_warp_rew_captured(warp_rew.action_l2, warp_env) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_action_rate_l2(self, warp_env, stable_env): + expected = stable_rew.action_rate_l2(stable_env) + actual = run_warp_rew(warp_rew.action_rate_l2, warp_env) + actual_cap = run_warp_rew_captured(warp_rew.action_rate_l2, warp_env) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Limit penalties -------------------------------------------------------- + + def test_joint_pos_limits(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_rew.joint_pos_limits(stable_env, asset_cfg=cfg) + actual = run_warp_rew(warp_rew.joint_pos_limits, warp_env, asset_cfg=cfg) + actual_cap = run_warp_rew_captured(warp_rew.joint_pos_limits, warp_env, asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + # -- Additional penalties --------------------------------------------------- + + def test_joint_deviation_l1(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + expected = stable_rew.joint_deviation_l1(stable_env, asset_cfg=cfg) + actual = run_warp_rew(warp_rew.joint_deviation_l1, warp_env, asset_cfg=cfg) + actual_cap = run_warp_rew_captured(warp_rew.joint_deviation_l1, warp_env, asset_cfg=cfg) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + +# ============================================================================ +# New reward parity tests (from test_mdp_warp_parity_new_terms.py) +# ============================================================================ + + +class TestNewRewardParity: + """Verify newly migrated reward Warp kernels match stable torch implementations.""" + + def test_track_lin_vel_xy_exp(self, warp_env_bodies, stable_env_bodies, body_cfg): + cfg = MockBodyCfg("robot") + cfg.joint_ids = list(range(NUM_JOINTS)) # needed for stable + std = 0.25 + expected = stable_rew.track_lin_vel_xy_exp(stable_env_bodies, std=std, command_name="vel", asset_cfg=cfg) + actual = run_warp_rew( + warp_rew.track_lin_vel_xy_exp, warp_env_bodies, std=std, command_name="vel", asset_cfg=cfg + ) + actual_cap = run_warp_rew_captured( + warp_rew.track_lin_vel_xy_exp, warp_env_bodies, std=std, command_name="vel", asset_cfg=cfg + ) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_track_ang_vel_z_exp(self, warp_env_bodies, stable_env_bodies, body_cfg): + cfg = MockBodyCfg("robot") + cfg.joint_ids = list(range(NUM_JOINTS)) + std = 0.25 + expected = stable_rew.track_ang_vel_z_exp(stable_env_bodies, std=std, command_name="vel", asset_cfg=cfg) + actual = run_warp_rew(warp_rew.track_ang_vel_z_exp, warp_env_bodies, std=std, command_name="vel", asset_cfg=cfg) + actual_cap = run_warp_rew_captured( + warp_rew.track_ang_vel_z_exp, warp_env_bodies, std=std, command_name="vel", asset_cfg=cfg + ) + assert_close(actual, expected) + assert_close(actual_cap, expected) + + def test_undesired_contacts(self, warp_env_bodies, stable_env_bodies, sensor_cfg): + threshold = 1.0 + expected = stable_rew.undesired_contacts(stable_env_bodies, threshold=threshold, sensor_cfg=sensor_cfg) + actual = run_warp_rew(warp_rew.undesired_contacts, warp_env_bodies, threshold=threshold, sensor_cfg=sensor_cfg) + actual_cap = run_warp_rew_captured( + warp_rew.undesired_contacts, warp_env_bodies, threshold=threshold, sensor_cfg=sensor_cfg + ) + assert_close(actual, expected.float()) + assert_close(actual_cap, expected.float()) + + +# ============================================================================ +# Capture-then-mutate-then-replay reward tests (from test_mdp_warp_parity.py) +# ============================================================================ + + +def _mutate_art_data(art_data: MockArticulationData, warp_env, rng_seed: int = 200): + """Mutate every data array in-place so captured graphs see fresh values.""" + mutate_art_data(art_data, warp_env, rng_seed=rng_seed) + + +def _mutate_body_data(art_data: MockArticulationData, rng_seed=200): + """Mutate body-level and root-level data in-place so captured graphs see fresh values.""" + mutate_body_data(art_data, rng_seed=rng_seed) + + +class TestCapturedDataMutationRewards: + """Capture a graph, mutate buffer data in-place, replay -- results must match stable on the *new* data. + + This verifies reward MDP functions are truly capture-safe. + """ + + def _capture_mutate_check_rew(self, warp_fn, stable_fn, warp_env, stable_env, art_data, **kwargs): + out = wp.zeros((NUM_ENVS,), dtype=wp.float32, device=DEVICE) + warp_fn(warp_env, out, **kwargs) + with wp.ScopedCapture() as cap: + warp_fn(warp_env, out, **kwargs) + _mutate_art_data(art_data, warp_env) + wp.capture_launch(cap.graph) + assert_close(wp.to_torch(out).clone(), stable_fn(stable_env, **kwargs)) + + def test_lin_vel_z_l2(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_rew( + warp_rew.lin_vel_z_l2, + stable_rew.lin_vel_z_l2, + warp_env, + stable_env, + art_data, + asset_cfg=all_joints_cfg, + ) + + def test_ang_vel_xy_l2(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_rew( + warp_rew.ang_vel_xy_l2, + stable_rew.ang_vel_xy_l2, + warp_env, + stable_env, + art_data, + asset_cfg=all_joints_cfg, + ) + + def test_flat_orientation_l2(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_rew( + warp_rew.flat_orientation_l2, + stable_rew.flat_orientation_l2, + warp_env, + stable_env, + art_data, + asset_cfg=all_joints_cfg, + ) + + def test_joint_vel_l2(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_rew( + warp_rew.joint_vel_l2, + stable_rew.joint_vel_l2, + warp_env, + stable_env, + art_data, + asset_cfg=all_joints_cfg, + ) + + def test_joint_acc_l2(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_rew( + warp_rew.joint_acc_l2, + stable_rew.joint_acc_l2, + warp_env, + stable_env, + art_data, + asset_cfg=all_joints_cfg, + ) + + def test_joint_torques_l2(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_rew( + warp_rew.joint_torques_l2, + stable_rew.joint_torques_l2, + warp_env, + stable_env, + art_data, + asset_cfg=all_joints_cfg, + ) + + def test_action_l2(self, warp_env, stable_env, art_data): + self._capture_mutate_check_rew( + warp_rew.action_l2, + stable_rew.action_l2, + warp_env, + stable_env, + art_data, + ) + + def test_action_rate_l2(self, warp_env, stable_env, art_data): + self._capture_mutate_check_rew( + warp_rew.action_rate_l2, + stable_rew.action_rate_l2, + warp_env, + stable_env, + art_data, + ) + + def test_joint_pos_limits(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_rew( + warp_rew.joint_pos_limits, + stable_rew.joint_pos_limits, + warp_env, + stable_env, + art_data, + asset_cfg=all_joints_cfg, + ) + + def test_joint_deviation_l1(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_rew( + warp_rew.joint_deviation_l1, + stable_rew.joint_deviation_l1, + warp_env, + stable_env, + art_data, + asset_cfg=all_joints_cfg, + ) + + +# ============================================================================ +# Capture-mutate-replay reward tests for new terms (from test_mdp_warp_parity_new_terms.py) +# ============================================================================ + + +class TestCapturedDataMutationRewardsNewTerms: + """Capture graph, mutate buffer data, replay -- verify new-terms reward results match stable.""" + + def _capture_mutate_check_rew(self, warp_fn, stable_fn, warp_env, stable_env, art_data, **kwargs): + out = wp.zeros((NUM_ENVS,), dtype=wp.float32, device=DEVICE) + warp_fn(warp_env, out, **kwargs) + with wp.ScopedCapture() as cap: + warp_fn(warp_env, out, **kwargs) + _mutate_body_data(art_data) + wp.capture_launch(cap.graph) + expected = stable_fn(stable_env, **kwargs) + assert_close(wp.to_torch(out).clone(), expected) + + def test_track_lin_vel_xy_exp(self, warp_env_bodies, stable_env_bodies, art_data_bodies): + cfg = MockBodyCfg("robot") + cfg.joint_ids = list(range(NUM_JOINTS)) + self._capture_mutate_check_rew( + warp_rew.track_lin_vel_xy_exp, + stable_rew.track_lin_vel_xy_exp, + warp_env_bodies, + stable_env_bodies, + art_data_bodies, + std=0.25, + command_name="vel", + asset_cfg=cfg, + ) + + def test_track_ang_vel_z_exp(self, warp_env_bodies, stable_env_bodies, art_data_bodies): + cfg = MockBodyCfg("robot") + cfg.joint_ids = list(range(NUM_JOINTS)) + self._capture_mutate_check_rew( + warp_rew.track_ang_vel_z_exp, + stable_rew.track_ang_vel_z_exp, + warp_env_bodies, + stable_env_bodies, + art_data_bodies, + std=0.25, + command_name="vel", + asset_cfg=cfg, + ) diff --git a/source/isaaclab_experimental/test/envs/mdp/test_terminations_warp_parity.py b/source/isaaclab_experimental/test/envs/mdp/test_terminations_warp_parity.py new file mode 100644 index 000000000000..d82339e01228 --- /dev/null +++ b/source/isaaclab_experimental/test/envs/mdp/test_terminations_warp_parity.py @@ -0,0 +1,349 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Parity tests for warp-first termination MDP terms.""" + +from __future__ import annotations + +import numpy as np +import pytest +import torch +import warp as wp + +# Skip entire module if no CUDA device available +wp.init() +pytestmark = pytest.mark.skipif(not wp.is_cuda_available(), reason="CUDA device required") + +import isaaclab_experimental.envs.mdp.terminations as warp_term +from parity_helpers import ( + BODY_IDS, + CMD_DIM, + DEVICE, + NUM_ACTIONS, + NUM_BODIES, + NUM_ENVS, + NUM_JOINTS, + MockActionManagerTorch, + MockActionManagerWarp, + MockArticulation, + MockArticulationData, + MockCommandManager, + MockCommandTerm, + MockContactSensor, + MockContactSensorData, + MockScene, + MockSceneEntityCfg, + MockSensorCfg, + MockTerminationManager, + assert_equal, + mutate_art_data, + mutate_body_data, + run_warp_term, + run_warp_term_captured, +) + +import isaaclab.envs.mdp.terminations as stable_term + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture(autouse=True) +def _clear_caches(): + yield + for fn in [warp_term.illegal_contact]: + for attr in list(vars(fn)): + if attr.startswith("_"): + delattr(fn, attr) + + +@pytest.fixture() +def art_data(): + return MockArticulationData(NUM_ENVS, NUM_JOINTS, DEVICE) + + +@pytest.fixture() +def art_data_bodies(): + return MockArticulationData(num_bodies=NUM_BODIES) + + +@pytest.fixture() +def env_origins(): + rng = np.random.RandomState(77) + origins_np = rng.randn(NUM_ENVS, 3).astype(np.float32) + return wp.array(origins_np, dtype=wp.vec3f, device=DEVICE) + + +@pytest.fixture() +def contact_data(): + return MockContactSensorData() + + +@pytest.fixture() +def cmd_tensor(): + rng = np.random.RandomState(99) + return torch.tensor(rng.randn(NUM_ENVS, CMD_DIM).astype(np.float32), device=DEVICE) + + +@pytest.fixture() +def cmd_term(): + return MockCommandTerm() + + +@pytest.fixture() +def scene(art_data, env_origins): + return MockScene({"robot": MockArticulation(art_data)}, env_origins) + + +@pytest.fixture() +def scene_bodies(art_data_bodies, env_origins, contact_data): + art = MockArticulation(art_data_bodies, num_bodies=NUM_BODIES) + sensor = MockContactSensor(contact_data) + return MockScene({"robot": art}, env_origins, sensors={"contact_sensor": sensor}) + + +@pytest.fixture() +def action_wp(): + rng = np.random.RandomState(99) + a = wp.array(rng.randn(NUM_ENVS, NUM_ACTIONS).astype(np.float32), device=DEVICE) + b = wp.array(rng.randn(NUM_ENVS, NUM_ACTIONS).astype(np.float32), device=DEVICE) + return a, b + + +@pytest.fixture() +def episode_length_buf(): + torch.manual_seed(55) + return torch.randint(0, 500, (NUM_ENVS,), dtype=torch.int64, device=DEVICE) + + +@pytest.fixture() +def warp_env(scene, action_wp, episode_length_buf): + """Env with warp action manager (for experimental functions).""" + + class _Env: + pass + + env = _Env() + env.scene = scene + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length_s = 10.0 + env.rng_state_wp = wp.array(np.arange(NUM_ENVS, dtype=np.uint32) + 42, device=DEVICE) + return env + + +@pytest.fixture() +def stable_env(scene, action_wp, episode_length_buf): + """Env with torch action manager (for stable functions).""" + + class _Env: + pass + + env = _Env() + env.scene = scene + env.action_manager = MockActionManagerTorch(action_wp[0], action_wp[1]) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env.step_dt = 0.02 + env.max_episode_length_s = 10.0 + return env + + +@pytest.fixture() +def warp_env_bodies(scene_bodies, action_wp, episode_length_buf, cmd_tensor, cmd_term): + """Env with body-level data and command manager (for new-terms termination tests).""" + + class _Env: + pass + + env = _Env() + env.scene = scene_bodies + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.command_manager = MockCommandManager(cmd_tensor, cmd_term) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env._episode_length_buf_wp = wp.from_torch(episode_length_buf) + env.step_dt = 0.02 + env.max_episode_length = 500 + env.max_episode_length_s = 10.0 + env.rng_state_wp = wp.array(np.arange(NUM_ENVS, dtype=np.uint32) + 42, device=DEVICE) + return env + + +@pytest.fixture() +def stable_env_bodies(scene_bodies, action_wp, episode_length_buf, cmd_tensor, cmd_term): + """Env with body-level data and command manager (for stable new-terms termination tests).""" + + class _Env: + pass + + env = _Env() + env.scene = scene_bodies + env.action_manager = MockActionManagerWarp(action_wp[0], action_wp[1]) + env.command_manager = MockCommandManager(cmd_tensor, cmd_term) + env.num_envs = NUM_ENVS + env.device = DEVICE + env.episode_length_buf = episode_length_buf + env._episode_length_buf_wp = wp.from_torch(episode_length_buf) + env.step_dt = 0.02 + env.max_episode_length = 500 + env.max_episode_length_s = 10.0 + # stable termination_manager needed for time_out + env.termination_manager = MockTerminationManager() + return env + + +@pytest.fixture() +def all_joints_cfg(): + return MockSceneEntityCfg("robot", list(range(NUM_JOINTS)), NUM_JOINTS, DEVICE) + + +@pytest.fixture() +def sensor_cfg(): + return MockSensorCfg("contact_sensor", BODY_IDS) + + +# ============================================================================ +# Termination parity tests (from test_mdp_warp_parity.py) +# ============================================================================ + + +class TestTerminationParity: + """Verify experimental termination Warp kernels match stable torch implementations.""" + + def test_root_height_below_minimum(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + min_h = 0.5 + expected = stable_term.root_height_below_minimum(stable_env, minimum_height=min_h, asset_cfg=cfg) + actual = run_warp_term(warp_term.root_height_below_minimum, warp_env, minimum_height=min_h, asset_cfg=cfg) + actual_cap = run_warp_term_captured( + warp_term.root_height_below_minimum, warp_env, minimum_height=min_h, asset_cfg=cfg + ) + assert_equal(actual, expected) + assert_equal(actual_cap, expected) + + def test_joint_pos_out_of_manual_limit(self, warp_env, stable_env, all_joints_cfg): + cfg = all_joints_cfg + bounds = (-1.0, 1.0) + expected = stable_term.joint_pos_out_of_manual_limit(stable_env, bounds=bounds, asset_cfg=cfg) + actual = run_warp_term(warp_term.joint_pos_out_of_manual_limit, warp_env, bounds=bounds, asset_cfg=cfg) + actual_cap = run_warp_term_captured( + warp_term.joint_pos_out_of_manual_limit, warp_env, bounds=bounds, asset_cfg=cfg + ) + assert_equal(actual, expected) + assert_equal(actual_cap, expected) + + +# ============================================================================ +# Termination parity tests (from test_mdp_warp_parity_new_terms.py) +# ============================================================================ + + +class TestTerminationParityNewTerms: + """Verify termination Warp kernels for newly migrated terms match stable torch implementations.""" + + def test_time_out(self, warp_env_bodies, stable_env_bodies): + expected = stable_term.time_out(stable_env_bodies) + actual = run_warp_term(warp_term.time_out, warp_env_bodies) + actual_cap = run_warp_term_captured(warp_term.time_out, warp_env_bodies) + assert_equal(actual, expected) + assert_equal(actual_cap, expected) + + def test_illegal_contact(self, warp_env_bodies, stable_env_bodies, sensor_cfg): + threshold = 1.0 + expected = stable_term.illegal_contact(stable_env_bodies, threshold=threshold, sensor_cfg=sensor_cfg) + actual = run_warp_term(warp_term.illegal_contact, warp_env_bodies, threshold=threshold, sensor_cfg=sensor_cfg) + actual_cap = run_warp_term_captured( + warp_term.illegal_contact, warp_env_bodies, threshold=threshold, sensor_cfg=sensor_cfg + ) + assert_equal(actual, expected) + assert_equal(actual_cap, expected) + + +# ============================================================================ +# Capture-then-mutate-then-replay termination tests (from test_mdp_warp_parity.py) +# ============================================================================ + + +def _mutate_art_data(art_data: MockArticulationData, warp_env, rng_seed: int = 200): + """Mutate every data array in-place so captured graphs see fresh values.""" + mutate_art_data(art_data, warp_env, rng_seed=rng_seed) + + +def _mutate_body_data(art_data: MockArticulationData, rng_seed=200): + """Mutate body-level and root-level data in-place so captured graphs see fresh values.""" + mutate_body_data(art_data, rng_seed=rng_seed) + + +class TestCapturedDataMutationTerminations: + """Capture a graph, mutate buffer data in-place, replay -- results must match stable on the *new* data. + + This verifies termination MDP functions are truly capture-safe. + """ + + def _capture_mutate_check_term(self, warp_fn, stable_fn, warp_env, stable_env, art_data, **kwargs): + out = wp.zeros((NUM_ENVS,), dtype=wp.bool, device=DEVICE) + warp_fn(warp_env, out, **kwargs) + with wp.ScopedCapture() as cap: + warp_fn(warp_env, out, **kwargs) + _mutate_art_data(art_data, warp_env) + wp.capture_launch(cap.graph) + assert_equal(wp.to_torch(out).clone(), stable_fn(stable_env, **kwargs)) + + def test_root_height_below_minimum(self, warp_env, stable_env, art_data, all_joints_cfg): + self._capture_mutate_check_term( + warp_term.root_height_below_minimum, + stable_term.root_height_below_minimum, + warp_env, + stable_env, + art_data, + minimum_height=0.5, + asset_cfg=all_joints_cfg, + ) + + def test_joint_pos_out_of_manual_limit(self, warp_env, stable_env, art_data, all_joints_cfg): + # joint_pos_out_of_manual_limit uses a 2D kernel that only writes True + # (never clears to False), so the output must be zeroed before each call. + # We include the zeroing inside the captured graph. + bounds = (-1.0, 1.0) + cfg = all_joints_cfg + out = wp.zeros((NUM_ENVS,), dtype=wp.bool, device=DEVICE) + # warm-up + out.zero_() + warp_term.joint_pos_out_of_manual_limit(warp_env, out, bounds=bounds, asset_cfg=cfg) + # capture (including the zero) + with wp.ScopedCapture() as cap: + out.zero_() + warp_term.joint_pos_out_of_manual_limit(warp_env, out, bounds=bounds, asset_cfg=cfg) + _mutate_art_data(art_data, warp_env) + wp.capture_launch(cap.graph) + expected = stable_term.joint_pos_out_of_manual_limit(stable_env, bounds=bounds, asset_cfg=cfg) + assert_equal(wp.to_torch(out).clone(), expected) + + +# ============================================================================ +# Capture-mutate-replay termination tests for new terms (from test_mdp_warp_parity_new_terms.py) +# ============================================================================ + + +class TestCapturedDataMutationTerminationsNewTerms: + """Capture graph, mutate buffer data, replay -- verify new-terms termination results match stable.""" + + def test_time_out(self, warp_env_bodies, stable_env_bodies, art_data_bodies): + out = wp.zeros((NUM_ENVS,), dtype=wp.bool, device=DEVICE) + warp_term.time_out(warp_env_bodies, out) + with wp.ScopedCapture() as cap: + warp_term.time_out(warp_env_bodies, out) + # Mutate episode length in-place + warp_env_bodies.episode_length_buf[:] = torch.randint(0, 600, (NUM_ENVS,), dtype=torch.int64, device=DEVICE) + wp.capture_launch(cap.graph) + expected = stable_term.time_out(stable_env_bodies) + assert_equal(wp.to_torch(out).clone(), expected) diff --git a/source/isaaclab_experimental/test/utils/test_manager_call_switch.py b/source/isaaclab_experimental/test/utils/test_manager_call_switch.py new file mode 100644 index 000000000000..b4745939eaa0 --- /dev/null +++ b/source/isaaclab_experimental/test/utils/test_manager_call_switch.py @@ -0,0 +1,475 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Tests for ManagerCallSwitch config loading, mode resolution, and dispatch.""" + +from __future__ import annotations + +import json +import os +import unittest + +import warp as wp +from isaaclab_experimental.utils.manager_call_switch import ManagerCallMode, ManagerCallSwitch + + +@wp.kernel +def _add_one(a: wp.array(dtype=wp.float32), b: wp.array(dtype=wp.float32)): + i = wp.tid() + b[i] = a[i] + 1.0 + + +class TestManagerCallMode(unittest.TestCase): + """Tests for the ManagerCallMode enum.""" + + def test_enum_values(self): + self.assertEqual(ManagerCallMode.STABLE, 0) + self.assertEqual(ManagerCallMode.WARP_NOT_CAPTURED, 1) + self.assertEqual(ManagerCallMode.WARP_CAPTURED, 2) + + def test_ordering(self): + self.assertLess(ManagerCallMode.STABLE, ManagerCallMode.WARP_NOT_CAPTURED) + self.assertLess(ManagerCallMode.WARP_NOT_CAPTURED, ManagerCallMode.WARP_CAPTURED) + + +# ====================================================================== +# Config loading +# ====================================================================== + + +class TestConfigLoading(unittest.TestCase): + """Tests for ManagerCallSwitch config parsing from dict, JSON, env var, and None.""" + + def test_none_uses_default(self): + switch = ManagerCallSwitch(cfg_source=None) + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.WARP_CAPTURED) + + def test_dict_config(self): + switch = ManagerCallSwitch(cfg_source={"default": 1}) + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.WARP_NOT_CAPTURED) + + def test_dict_per_manager_override(self): + switch = ManagerCallSwitch(cfg_source={"default": 2, "RewardManager": 0}) + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.STABLE) + self.assertEqual(switch.get_mode_for_manager("ActionManager"), ManagerCallMode.WARP_CAPTURED) + + def test_dict_without_default_key(self): + """A dict missing 'default' should inherit from DEFAULT_CONFIG.""" + switch = ManagerCallSwitch(cfg_source={"RewardManager": 0}) + # default should be 2 (from DEFAULT_CONFIG) + self.assertEqual(switch.get_mode_for_manager("ActionManager"), ManagerCallMode.WARP_CAPTURED) + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.STABLE) + + def test_json_string_config(self): + cfg_str = json.dumps({"default": 1, "ObservationManager": 0}) + switch = ManagerCallSwitch(cfg_source=cfg_str) + self.assertEqual(switch.get_mode_for_manager("ObservationManager"), ManagerCallMode.STABLE) + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.WARP_NOT_CAPTURED) + + def test_empty_string_uses_default(self): + switch = ManagerCallSwitch(cfg_source="") + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.WARP_CAPTURED) + + def test_env_var_fallback(self): + """When cfg_source is None, should read from MANAGER_CALL_CONFIG env var.""" + old = os.environ.get(ManagerCallSwitch.ENV_VAR) + try: + os.environ[ManagerCallSwitch.ENV_VAR] = json.dumps({"default": 0}) + switch = ManagerCallSwitch(cfg_source=None) + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.STABLE) + finally: + if old is None: + os.environ.pop(ManagerCallSwitch.ENV_VAR, None) + else: + os.environ[ManagerCallSwitch.ENV_VAR] = old + + def test_invalid_json_raises(self): + with self.assertRaises(json.JSONDecodeError): + ManagerCallSwitch(cfg_source="not valid json") + + def test_invalid_mode_value_raises(self): + with self.assertRaises(ValueError): + ManagerCallSwitch(cfg_source={"default": 99}) + + def test_non_int_mode_raises(self): + with self.assertRaises(TypeError): + ManagerCallSwitch(cfg_source={"default": "fast"}) + + def test_invalid_cfg_type_raises(self): + with self.assertRaises(TypeError): + ManagerCallSwitch(cfg_source=42) + + +# ====================================================================== +# Mode resolution and capping +# ====================================================================== + + +class TestModeResolution(unittest.TestCase): + """Tests for mode resolution, MAX_MODE_OVERRIDES, and dynamic capturability.""" + + def test_max_mode_override_caps_default(self): + """Scene is capped to WARP_NOT_CAPTURED by MAX_MODE_OVERRIDES.""" + switch = ManagerCallSwitch(cfg_source={"default": 2}) + # Scene has a static cap of WARP_NOT_CAPTURED + self.assertEqual( + switch.get_mode_for_manager("Scene"), + ManagerCallMode.WARP_NOT_CAPTURED, + ) + # Other managers should not be capped + self.assertEqual( + switch.get_mode_for_manager("RewardManager"), + ManagerCallMode.WARP_CAPTURED, + ) + + def test_max_modes_kwarg(self): + """max_modes kwarg should cap managers beyond MAX_MODE_OVERRIDES.""" + switch = ManagerCallSwitch( + cfg_source={"default": 2}, + max_modes={"RewardManager": ManagerCallMode.WARP_NOT_CAPTURED}, + ) + self.assertEqual( + switch.get_mode_for_manager("RewardManager"), + ManagerCallMode.WARP_NOT_CAPTURED, + ) + + def test_register_manager_capturability_downgrades(self): + """register_manager_capturability(False) should cap a manager to WARP_NOT_CAPTURED.""" + switch = ManagerCallSwitch(cfg_source={"default": 2}) + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.WARP_CAPTURED) + + switch.register_manager_capturability("RewardManager", capturable=False) + self.assertEqual( + switch.get_mode_for_manager("RewardManager"), + ManagerCallMode.WARP_NOT_CAPTURED, + ) + + def test_register_capturability_true_is_noop(self): + """register_manager_capturability(True) should not change anything.""" + switch = ManagerCallSwitch(cfg_source={"default": 2}) + switch.register_manager_capturability("RewardManager", capturable=True) + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.WARP_CAPTURED) + + def test_register_capturability_does_not_upgrade(self): + """If a manager is already capped to NOT_CAPTURED, registering capturable=False again shouldn't change it.""" + switch = ManagerCallSwitch(cfg_source={"default": 2}) + switch.register_manager_capturability("RewardManager", capturable=False) + switch.register_manager_capturability("RewardManager", capturable=False) + self.assertEqual( + switch.get_mode_for_manager("RewardManager"), + ManagerCallMode.WARP_NOT_CAPTURED, + ) + + def test_capturability_interacts_with_static_cap(self): + """Dynamic capturability should respect existing static caps.""" + switch = ManagerCallSwitch( + cfg_source={"default": 2}, + max_modes={"RewardManager": ManagerCallMode.WARP_NOT_CAPTURED}, + ) + # Already capped, register_capturability(False) should be harmless + switch.register_manager_capturability("RewardManager", capturable=False) + self.assertEqual( + switch.get_mode_for_manager("RewardManager"), + ManagerCallMode.WARP_NOT_CAPTURED, + ) + + def test_mode_0_not_affected_by_cap(self): + """A manager explicitly set to STABLE (0) should stay at 0 even with cap at 1.""" + switch = ManagerCallSwitch(cfg_source={"default": 2, "RewardManager": 0}) + switch.register_manager_capturability("RewardManager", capturable=False) + self.assertEqual(switch.get_mode_for_manager("RewardManager"), ManagerCallMode.STABLE) + + +# ====================================================================== +# Stage dispatch +# ====================================================================== + + +class TestStageDispatch(unittest.TestCase): + """Tests for call_stage routing through stable / warp-eager / warp-captured paths.""" + + def test_stable_mode_calls_stable_fn(self): + switch = ManagerCallSwitch(cfg_source={"default": 0}) + called = {"stable": False, "warp": False} + + def stable_fn(): + called["stable"] = True + return "stable_result" + + def warp_fn(): + called["warp"] = True + return "warp_result" + + result = switch.call_stage( + stage="RewardManager_compute", + warp_call={"fn": warp_fn}, + stable_call={"fn": stable_fn}, + ) + self.assertTrue(called["stable"]) + self.assertFalse(called["warp"]) + self.assertEqual(result, "stable_result") + + def test_stable_mode_without_stable_call_raises(self): + switch = ManagerCallSwitch(cfg_source={"default": 0}) + with self.assertRaises(ValueError): + switch.call_stage( + stage="RewardManager_compute", + warp_call={"fn": lambda: None}, + stable_call=None, + ) + + def test_warp_eager_mode_calls_warp_fn(self): + switch = ManagerCallSwitch(cfg_source={"default": 1}) + called = {"stable": False, "warp": False} + + def stable_fn(): + called["stable"] = True + return "stable_result" + + def warp_fn(): + called["warp"] = True + return "warp_result" + + result = switch.call_stage( + stage="RewardManager_compute", + warp_call={"fn": warp_fn}, + stable_call={"fn": stable_fn}, + ) + self.assertFalse(called["stable"]) + self.assertTrue(called["warp"]) + self.assertEqual(result, "warp_result") + + def test_output_transform(self): + """The 'output' key in call spec should transform the return value.""" + switch = ManagerCallSwitch(cfg_source={"default": 1}) + + def warp_fn(): + return [1, 2, 3] + + result = switch.call_stage( + stage="RewardManager_compute", + warp_call={"fn": warp_fn, "output": len}, + ) + self.assertEqual(result, 3) + + def test_args_and_kwargs_forwarded(self): + """call_stage should forward args and kwargs from the call spec.""" + switch = ManagerCallSwitch(cfg_source={"default": 1}) + received = {} + + def warp_fn(a, b, key=None): + received["a"] = a + received["b"] = b + received["key"] = key + + switch.call_stage( + stage="RewardManager_compute", + warp_call={"fn": warp_fn, "args": (1, 2), "kwargs": {"key": "val"}}, + ) + self.assertEqual(received, {"a": 1, "b": 2, "key": "val"}) + + +class TestStageDispatchCaptured(unittest.TestCase): + """Tests for WARP_CAPTURED mode (requires GPU).""" + + def setUp(self): + self.device = "cuda:0" + + def test_captured_mode_produces_correct_output(self): + """WARP_CAPTURED should capture and replay a warp kernel correctly.""" + switch = ManagerCallSwitch(cfg_source={"default": 2}) + src = wp.full(4, value=5.0, dtype=wp.float32, device=self.device) + dst = wp.zeros(4, dtype=wp.float32, device=self.device) + + def warp_fn(): + wp.launch(_add_one, dim=4, inputs=[src, dst], device=self.device) + return dst + + # First call: warm-up + capture + result = switch.call_stage( + stage="RewardManager_compute", + warp_call={"fn": warp_fn}, + ) + self.assertAlmostEqual(result.numpy()[0], 6.0, places=5) + + # Replay + wp.copy(src, wp.full(4, value=10.0, dtype=wp.float32, device=self.device)) + result2 = switch.call_stage( + stage="RewardManager_compute", + warp_call={"fn": warp_fn}, + ) + self.assertIs(result, result2, "Replay must return same reference") + self.assertAlmostEqual(result2.numpy()[0], 11.0, places=5) + + def test_captured_warmup_call_count(self): + """WARP_CAPTURED first call should invoke fn exactly 2 times (warm-up + capture).""" + switch = ManagerCallSwitch(cfg_source={"default": 2}) + src = wp.zeros(4, dtype=wp.float32, device=self.device) + dst = wp.zeros(4, dtype=wp.float32, device=self.device) + call_count = [0] + + def warp_fn(): + call_count[0] += 1 + wp.launch(_add_one, dim=4, inputs=[src, dst], device=self.device) + return dst + + # First call_stage: warm-up (1) + capture (2) + switch.call_stage(stage="RewardManager_compute", warp_call={"fn": warp_fn}) + self.assertEqual(call_count[0], 2, "First captured call should invoke fn twice (warm-up + capture)") + + # Second call_stage: replay only — no new fn invocation + switch.call_stage(stage="RewardManager_compute", warp_call={"fn": warp_fn}) + self.assertEqual(call_count[0], 2, "Replay should not invoke fn again") + + # Third call_stage: still replay + switch.call_stage(stage="RewardManager_compute", warp_call={"fn": warp_fn}) + self.assertEqual(call_count[0], 2) + + def test_captured_warmup_handles_hasattr_guard(self): + """Warm-up should flush first-call allocations so capture doesn't record them. + + This simulates the real-world pattern where MDP terms allocate scratch + buffers on first call using hasattr guards. + """ + switch = ManagerCallSwitch(cfg_source={"default": 2}) + src = wp.ones(8, dtype=wp.float32, device=self.device) + holder = {} + + def fn_with_guard(): + if "buf" not in holder: + # First-call allocation — must happen during warm-up, not capture + holder["buf"] = wp.zeros(8, dtype=wp.float32, device=self.device) + wp.launch(_add_one, dim=8, inputs=[src, holder["buf"]], device=self.device) + return holder["buf"] + + # Should not raise — warm-up handles the allocation outside capture context + result = switch.call_stage( + stage="RewardManager_compute", + warp_call={"fn": fn_with_guard}, + ) + result_np = result.numpy() + for val in result_np: + self.assertAlmostEqual(val, 2.0, places=5) + + # Replay should also work (allocation already done, only kernel replays) + wp.copy(src, wp.full(8, value=5.0, dtype=wp.float32, device=self.device)) + result2 = switch.call_stage( + stage="RewardManager_compute", + warp_call={"fn": fn_with_guard}, + ) + for val in result2.numpy(): + self.assertAlmostEqual(val, 6.0, places=5) + + def test_warp_eager_no_warmup(self): + """WARP_NOT_CAPTURED mode should call fn exactly once per call (no warm-up).""" + switch = ManagerCallSwitch(cfg_source={"default": 1}) + call_count = [0] + + def warp_fn(): + call_count[0] += 1 + + switch.call_stage(stage="RewardManager_compute", warp_call={"fn": warp_fn}) + self.assertEqual(call_count[0], 1, "Eager mode should call fn exactly once") + + switch.call_stage(stage="RewardManager_compute", warp_call={"fn": warp_fn}) + self.assertEqual(call_count[0], 2, "Eager mode should call fn again each time") + + +# ====================================================================== +# Graph invalidation +# ====================================================================== + + +class TestGraphInvalidation(unittest.TestCase): + """Tests for invalidate_graphs on ManagerCallSwitch.""" + + def setUp(self): + self.device = "cuda:0" + + def test_invalidate_forces_recapture(self): + switch = ManagerCallSwitch(cfg_source={"default": 2}) + src = wp.full(4, value=1.0, dtype=wp.float32, device=self.device) + dst = wp.zeros(4, dtype=wp.float32, device=self.device) + + call_count = [0] + + def warp_fn(): + call_count[0] += 1 + wp.launch(_add_one, dim=4, inputs=[src, dst], device=self.device) + return dst + + # First capture + switch.call_stage(stage="RewardManager_compute", warp_call={"fn": warp_fn}) + self.assertEqual(call_count[0], 2) # warm-up + capture + + # Replay — no new calls + switch.call_stage(stage="RewardManager_compute", warp_call={"fn": warp_fn}) + self.assertEqual(call_count[0], 2) + + # Invalidate + switch.invalidate_graphs() + + # Should re-warm-up + re-capture + switch.call_stage(stage="RewardManager_compute", warp_call={"fn": warp_fn}) + self.assertEqual(call_count[0], 4) + + +# ====================================================================== +# resolve_manager_class +# ====================================================================== + + +class TestResolveManagerClass(unittest.TestCase): + """Tests for resolve_manager_class.""" + + def test_stable_resolves_to_isaaclab_managers(self): + switch = ManagerCallSwitch(cfg_source={"default": 0}) + cls = switch.resolve_manager_class("RewardManager") + self.assertTrue(cls.__module__.startswith("isaaclab.managers")) + + def test_warp_resolves_to_experimental_managers(self): + switch = ManagerCallSwitch(cfg_source={"default": 1}) + cls = switch.resolve_manager_class("RewardManager") + self.assertTrue(cls.__module__.startswith("isaaclab_experimental")) + + def test_mode_override(self): + """mode_override should bypass the config for class resolution.""" + switch = ManagerCallSwitch(cfg_source={"default": 2}) + cls = switch.resolve_manager_class("RewardManager", mode_override=ManagerCallMode.STABLE) + self.assertTrue(cls.__module__.startswith("isaaclab.managers")) + + def test_invalid_manager_raises(self): + switch = ManagerCallSwitch(cfg_source={"default": 0}) + with self.assertRaises(AttributeError): + switch.resolve_manager_class("NonexistentManager") + + +# ====================================================================== +# Manager name parsing +# ====================================================================== + + +class TestManagerNameParsing(unittest.TestCase): + """Tests for stage name → manager name extraction.""" + + def test_valid_stage_name(self): + switch = ManagerCallSwitch(cfg_source={"default": 1}) + # Dispatch a stage with valid name format + called = [False] + + def fn(): + called[0] = True + + switch.call_stage(stage="RewardManager_compute", warp_call={"fn": fn}) + self.assertTrue(called[0]) + + def test_invalid_stage_name_raises(self): + switch = ManagerCallSwitch(cfg_source={"default": 1}) + with self.assertRaises(ValueError): + switch.call_stage(stage="nounderscore", warp_call={"fn": lambda: None}) + + +if __name__ == "__main__": + unittest.main() diff --git a/source/isaaclab_experimental/test/utils/test_warp_graph_cache.py b/source/isaaclab_experimental/test/utils/test_warp_graph_cache.py new file mode 100644 index 000000000000..1b9c10a76895 --- /dev/null +++ b/source/isaaclab_experimental/test/utils/test_warp_graph_cache.py @@ -0,0 +1,249 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Tests for WarpGraphCache capture/replay and warm-up behavior.""" + +from __future__ import annotations + +import unittest + +import warp as wp +from isaaclab_experimental.utils.warp_graph_cache import WarpGraphCache + + +@wp.kernel +def _add_one(a: wp.array(dtype=wp.float32), b: wp.array(dtype=wp.float32)): + """Simple warp kernel: b[i] = a[i] + 1.""" + i = wp.tid() + b[i] = a[i] + 1.0 + + +class TestWarpGraphCache(unittest.TestCase): + """Tests for :class:`WarpGraphCache`.""" + + def setUp(self): + self.device = "cuda:0" + self.cache = WarpGraphCache() + + # ------------------------------------------------------------------ + # Warm-up + # ------------------------------------------------------------------ + + def test_warmup_runs_before_capture(self): + """The function should be called eagerly (warm-up) before graph capture. + + We verify this by counting total invocations on the first call. + Warm-up = 1, capture = 1, so fn should be called exactly 2 times. + """ + call_count = [0] + src = wp.zeros(4, dtype=wp.float32, device=self.device) + dst = wp.zeros(4, dtype=wp.float32, device=self.device) + + def counted_launch(): + call_count[0] += 1 + wp.launch(_add_one, dim=4, inputs=[src, dst], device=self.device) + return dst + + # First call: warm-up + capture + self.cache.capture_or_replay("stage_a", counted_launch) + self.assertEqual(call_count[0], 2, "First call should invoke fn twice (warm-up + capture)") + + # Second call: replay only + self.cache.capture_or_replay("stage_a", counted_launch) + self.assertEqual(call_count[0], 2, "Replay should NOT invoke fn again") + + def test_warmup_flushes_first_call_allocations(self): + """Warm-up should handle first-call allocations so capture is clean. + + Simulates a hasattr guard pattern: allocate a buffer on first call only. + Without warm-up, the allocation would be recorded in the graph. + """ + holder = {} + src = wp.ones(8, dtype=wp.float32, device=self.device) + + def fn_with_hasattr_guard(): + if "buf" not in holder: + holder["buf"] = wp.zeros(8, dtype=wp.float32, device=self.device) + wp.launch(_add_one, dim=8, inputs=[src, holder["buf"]], device=self.device) + return holder["buf"] + + # Should not raise — warm-up handles the allocation outside capture + result = self.cache.capture_or_replay("guarded", fn_with_hasattr_guard) + self.assertIsNotNone(result) + + # Verify the kernel produced correct output + result_np = result.numpy() + for val in result_np: + self.assertAlmostEqual(val, 2.0, places=5) + + # ------------------------------------------------------------------ + # Capture / replay correctness + # ------------------------------------------------------------------ + + def test_capture_produces_correct_output(self): + """After capture, replaying the graph should produce correct results.""" + src = wp.full(4, value=3.0, dtype=wp.float32, device=self.device) + dst = wp.zeros(4, dtype=wp.float32, device=self.device) + + def my_fn(): + wp.launch(_add_one, dim=4, inputs=[src, dst], device=self.device) + return dst + + result = self.cache.capture_or_replay("compute", my_fn) + result_np = result.numpy() + for val in result_np: + self.assertAlmostEqual(val, 4.0, places=5) + + def test_replay_uses_updated_input(self): + """Replay should re-read from the same input buffer (pointer-stable). + + CUDA graph replay re-executes the same kernel on the same memory + addresses. If we update the input buffer in-place, the output + should reflect the new values. + """ + src = wp.full(4, value=1.0, dtype=wp.float32, device=self.device) + dst = wp.zeros(4, dtype=wp.float32, device=self.device) + + def my_fn(): + wp.launch(_add_one, dim=4, inputs=[src, dst], device=self.device) + return dst + + # Capture + self.cache.capture_or_replay("replay_test", my_fn) + + # Update input in-place + wp.copy(src, wp.full(4, value=10.0, dtype=wp.float32, device=self.device)) + + # Replay — should see updated input + result = self.cache.capture_or_replay("replay_test", my_fn) + result_np = result.numpy() + for val in result_np: + self.assertAlmostEqual(val, 11.0, places=5) + + def test_cached_result_is_same_reference(self): + """Replay should return the exact same object reference as capture.""" + src = wp.zeros(4, dtype=wp.float32, device=self.device) + dst = wp.zeros(4, dtype=wp.float32, device=self.device) + + def my_fn(): + wp.launch(_add_one, dim=4, inputs=[src, dst], device=self.device) + return dst + + result1 = self.cache.capture_or_replay("ref_test", my_fn) + result2 = self.cache.capture_or_replay("ref_test", my_fn) + self.assertIs(result1, result2, "Replay must return the same object reference") + + def test_multiple_stages_independent(self): + """Different stages should be captured and replayed independently.""" + src_a = wp.full(4, value=1.0, dtype=wp.float32, device=self.device) + dst_a = wp.zeros(4, dtype=wp.float32, device=self.device) + src_b = wp.full(4, value=5.0, dtype=wp.float32, device=self.device) + dst_b = wp.zeros(4, dtype=wp.float32, device=self.device) + + def fn_a(): + wp.launch(_add_one, dim=4, inputs=[src_a, dst_a], device=self.device) + return dst_a + + def fn_b(): + wp.launch(_add_one, dim=4, inputs=[src_b, dst_b], device=self.device) + return dst_b + + result_a = self.cache.capture_or_replay("stage_a", fn_a) + result_b = self.cache.capture_or_replay("stage_b", fn_b) + + self.assertAlmostEqual(result_a.numpy()[0], 2.0, places=5) + self.assertAlmostEqual(result_b.numpy()[0], 6.0, places=5) + + # Replay both + result_a2 = self.cache.capture_or_replay("stage_a", fn_a) + result_b2 = self.cache.capture_or_replay("stage_b", fn_b) + self.assertIs(result_a, result_a2) + self.assertIs(result_b, result_b2) + + # ------------------------------------------------------------------ + # Invalidation + # ------------------------------------------------------------------ + + def test_invalidate_all(self): + """invalidate() with no args should drop all cached graphs.""" + src = wp.zeros(4, dtype=wp.float32, device=self.device) + dst = wp.zeros(4, dtype=wp.float32, device=self.device) + + call_count = [0] + + def my_fn(): + call_count[0] += 1 + wp.launch(_add_one, dim=4, inputs=[src, dst], device=self.device) + return dst + + self.cache.capture_or_replay("s1", my_fn) + self.assertEqual(call_count[0], 2) # warm-up + capture + + self.cache.invalidate() + + # After invalidation, next call should re-warm-up and re-capture + self.cache.capture_or_replay("s1", my_fn) + self.assertEqual(call_count[0], 4) # 2 more (warm-up + capture) + + def test_invalidate_single_stage(self): + """invalidate(stage) should only drop the named stage.""" + src = wp.zeros(4, dtype=wp.float32, device=self.device) + dst_a = wp.zeros(4, dtype=wp.float32, device=self.device) + dst_b = wp.zeros(4, dtype=wp.float32, device=self.device) + + count_a = [0] + count_b = [0] + + def fn_a(): + count_a[0] += 1 + wp.launch(_add_one, dim=4, inputs=[src, dst_a], device=self.device) + return dst_a + + def fn_b(): + count_b[0] += 1 + wp.launch(_add_one, dim=4, inputs=[src, dst_b], device=self.device) + return dst_b + + self.cache.capture_or_replay("a", fn_a) + self.cache.capture_or_replay("b", fn_b) + self.assertEqual(count_a[0], 2) + self.assertEqual(count_b[0], 2) + + # Invalidate only "a" + self.cache.invalidate("a") + + self.cache.capture_or_replay("a", fn_a) + self.cache.capture_or_replay("b", fn_b) + self.assertEqual(count_a[0], 4, "Stage 'a' should re-capture after invalidation") + self.assertEqual(count_b[0], 2, "Stage 'b' should replay (not re-capture)") + + def test_invalidate_nonexistent_stage_is_noop(self): + """Invalidating a stage that was never captured should not raise.""" + self.cache.invalidate("nonexistent") # should not raise + + # ------------------------------------------------------------------ + # Args / kwargs forwarding + # ------------------------------------------------------------------ + + def test_args_and_kwargs_forwarded(self): + """capture_or_replay should forward args and kwargs to fn.""" + src = wp.full(4, value=2.0, dtype=wp.float32, device=self.device) + dst = wp.zeros(4, dtype=wp.float32, device=self.device) + + def my_fn(a, b, device="cuda:0"): + wp.launch(_add_one, dim=4, inputs=[a, b], device=device) + return b + + result = self.cache.capture_or_replay( + "args_test", + my_fn, + args=(src, dst), + kwargs={"device": self.device}, + ) + self.assertAlmostEqual(result.numpy()[0], 3.0, places=5) + + +if __name__ == "__main__": + unittest.main() diff --git a/source/isaaclab_newton/isaaclab_newton/assets/articulation/articulation_data.py b/source/isaaclab_newton/isaaclab_newton/assets/articulation/articulation_data.py index 0c4cdc68a353..e78296c32864 100644 --- a/source/isaaclab_newton/isaaclab_newton/assets/articulation/articulation_data.py +++ b/source/isaaclab_newton/isaaclab_newton/assets/articulation/articulation_data.py @@ -15,6 +15,7 @@ from isaaclab.assets.articulation.base_articulation_data import BaseArticulationData from isaaclab.utils.buffers import TimestampedBufferWarp as TimestampedBuffer from isaaclab.utils.math import normalize +from isaaclab.utils.warp.utils import capture_unsafe from isaaclab_newton.assets import kernels as shared_kernels from isaaclab_newton.assets.articulation import kernels as articulation_kernels @@ -26,6 +27,13 @@ # import logger logger = logging.getLogger(__name__) +_LAZY_CAPTURE_REASON = ( + "This is a lazily-computed derived property guarded by a Python timestamp check " + "that is invisible during graph replay. Use Tier 1 base data (root_link_pose_w, " + "root_com_vel_w, body_link_pose_w, body_com_vel_w, joint_pos, joint_vel) and " + "inline the computation in your warp kernel. See GRAPH_CAPTURE_MIGRATION.md." +) + class ArticulationData(BaseArticulationData): """Data container for an articulation. @@ -553,6 +561,7 @@ def root_link_pose_w(self) -> wp.array: return self._sim_bind_root_link_pose_w @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def root_link_vel_w(self) -> wp.array: """Root link velocity ``[lin_vel, ang_vel]`` in simulation world frame. @@ -580,6 +589,7 @@ def root_link_vel_w(self) -> wp.array: return self._root_link_vel_w.data @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def root_com_pose_w(self) -> wp.array: """Root center of mass pose ``[pos, quat]`` in simulation world frame. @@ -654,6 +664,7 @@ def body_link_pose_w(self) -> wp.array: return self._sim_bind_body_link_pose_w @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def body_link_vel_w(self) -> wp.array: """Body link velocity ``[lin_vel, ang_vel]`` in simulation world frame. @@ -682,6 +693,7 @@ def body_link_vel_w(self) -> wp.array: return self._body_link_vel_w.data @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def body_com_pose_w(self) -> wp.array: """Body center of mass pose ``[pos, quat]`` in simulation world frame. @@ -860,6 +872,7 @@ def joint_acc(self) -> wp.array: """ @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def projected_gravity_b(self): """Projection of the gravity direction on base frame. @@ -877,6 +890,7 @@ def projected_gravity_b(self): return self._projected_gravity_b.data @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def heading_w(self): """Yaw heading of the base frame (in radians). @@ -898,6 +912,7 @@ def heading_w(self): return self._heading_w.data @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def root_link_lin_vel_b(self) -> wp.array: """Root link linear velocity in base frame. diff --git a/source/isaaclab_newton/isaaclab_newton/assets/rigid_object/rigid_object_data.py b/source/isaaclab_newton/isaaclab_newton/assets/rigid_object/rigid_object_data.py index 2af92df4592c..514d4e836d34 100644 --- a/source/isaaclab_newton/isaaclab_newton/assets/rigid_object/rigid_object_data.py +++ b/source/isaaclab_newton/isaaclab_newton/assets/rigid_object/rigid_object_data.py @@ -15,6 +15,7 @@ from isaaclab.assets.rigid_object.base_rigid_object_data import BaseRigidObjectData from isaaclab.utils.buffers import TimestampedBufferWarp as TimestampedBuffer from isaaclab.utils.math import normalize +from isaaclab.utils.warp.utils import capture_unsafe from isaaclab_newton.assets import kernels as shared_kernels from isaaclab_newton.physics import NewtonManager as SimulationManager @@ -26,6 +27,13 @@ # import logger logger = logging.getLogger(__name__) +_LAZY_CAPTURE_REASON = ( + "This is a lazily-computed derived property guarded by a Python timestamp check " + "that is invisible during graph replay. Use Tier 1 base data (root_link_pose_w, " + "root_com_vel_w, body_link_pose_w, body_com_vel_w) and inline the computation " + "in your warp kernel. See GRAPH_CAPTURE_MIGRATION.md." +) + class RigidObjectData(BaseRigidObjectData): """Data container for a rigid object. @@ -188,6 +196,7 @@ def root_link_pose_w(self) -> wp.array: return self._sim_bind_root_link_pose_w @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def root_link_vel_w(self) -> wp.array: """Root link velocity ``[lin_vel, ang_vel]`` in simulation world frame. @@ -215,6 +224,7 @@ def root_link_vel_w(self) -> wp.array: return self._root_link_vel_w.data @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def root_com_pose_w(self) -> wp.array: """Root center of mass pose ``[pos, quat]`` in simulation world frame. @@ -283,6 +293,7 @@ def body_link_pose_w(self) -> wp.array: return self._sim_bind_body_link_pose_w @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def body_link_vel_w(self) -> wp.array: """Body link velocity ``[lin_vel, ang_vel]`` in simulation world frame. @@ -293,6 +304,7 @@ def body_link_vel_w(self) -> wp.array: return self.root_link_vel_w.reshape((self._num_instances, 1)) @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def body_com_pose_w(self) -> wp.array: """Body center of mass pose ``[pos, quat]`` in simulation world frame. @@ -382,6 +394,7 @@ def body_com_pose_b(self) -> wp.array: """ @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def projected_gravity_b(self) -> wp.array: """Projection of the gravity direction on base frame. @@ -399,6 +412,7 @@ def projected_gravity_b(self) -> wp.array: return self._projected_gravity_b.data @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def heading_w(self) -> wp.array: """Yaw heading of the base frame (in radians). @@ -420,6 +434,7 @@ def heading_w(self) -> wp.array: return self._heading_w.data @property + @capture_unsafe(_LAZY_CAPTURE_REASON) def root_link_lin_vel_b(self) -> wp.array: """Root link linear velocity in base frame. diff --git a/source/isaaclab_newton/isaaclab_newton/kernels/state_kernels.py b/source/isaaclab_newton/isaaclab_newton/kernels/state_kernels.py new file mode 100644 index 000000000000..9bb7a39e3b2c --- /dev/null +++ b/source/isaaclab_newton/isaaclab_newton/kernels/state_kernels.py @@ -0,0 +1,30 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp helper functions for body-frame state computations. + +These ``@wp.func`` helpers are used by warp-first MDP terms (observations, +rewards) that need to project root-frame quantities into body frames. +""" + +import warp as wp + + +@wp.func +def rotate_vec_to_body_frame(vec_w: wp.vec3f, pose_w: wp.transformf) -> wp.vec3f: + """Rotate a world-frame vector into the body frame defined by pose_w.""" + return wp.quat_rotate_inv(wp.transform_get_rotation(pose_w), vec_w) + + +@wp.func +def body_lin_vel_from_root(pose_w: wp.transformf, vel_w: wp.spatial_vectorf) -> wp.vec3f: + """Extract body-frame linear velocity from root pose and spatial velocity.""" + return wp.quat_rotate_inv(wp.transform_get_rotation(pose_w), wp.spatial_top(vel_w)) + + +@wp.func +def body_ang_vel_from_root(pose_w: wp.transformf, vel_w: wp.spatial_vectorf) -> wp.vec3f: + """Extract body-frame angular velocity from root pose and spatial velocity.""" + return wp.quat_rotate_inv(wp.transform_get_rotation(pose_w), wp.spatial_bottom(vel_w)) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/__init__.py index 4781f141af49..79c13e2aa8f4 100644 --- a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/__init__.py +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/__init__.py @@ -3,4 +3,10 @@ # # SPDX-License-Identifier: BSD-3-Clause -"""Classic experimental task registrations (manager-based).""" +"""Classic environments for control. + +These environments are based on the MuJoCo environments provided by OpenAI. + +Reference: + https://github.com/openai/gym/tree/master/gym/envs/mujoco +""" diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/ant/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/ant/__init__.py new file mode 100644 index 000000000000..5f123abaa756 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/ant/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Ant locomotion environment (experimental manager-based entry point). +""" + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.classic.ant import agents + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Ant-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.ant_env_cfg:AntEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AntPPORunnerCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/ant/ant_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/ant/ant_env_cfg.py new file mode 100644 index 000000000000..106d1a78ba09 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/ant/ant_env_cfg.py @@ -0,0 +1,198 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +# Ant reuses humanoid's experimental MDP (mirrors stable pattern). +from isaaclab_experimental.managers import ObservationTermCfg as ObsTerm +from isaaclab_experimental.managers import RewardTermCfg as RewTerm +from isaaclab_experimental.managers import TerminationTermCfg as DoneTerm +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +import isaaclab.sim as sim_utils +from isaaclab.assets import AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass + +import isaaclab_tasks_experimental.manager_based.classic.humanoid.mdp as mdp + +## +# Pre-defined configs +## +from isaaclab_assets.robots.ant import ANT_CFG # isort: skip + + +@configclass +class MySceneCfg(InteractiveSceneCfg): + """Configuration for the terrain scene with an ant robot.""" + + # terrain + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="plane", + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="average", + restitution_combine_mode="average", + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.0, + ), + debug_vis=False, + ) + + # robot + robot = ANT_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DistantLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0), + ) + + +## +# MDP settings +## + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + joint_effort = mdp.JointEffortActionCfg(asset_name="robot", joint_names=[".*"], scale=7.5) + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for the policy.""" + + base_height = ObsTerm(func=mdp.base_pos_z) + base_lin_vel = ObsTerm(func=mdp.base_lin_vel) + base_ang_vel = ObsTerm(func=mdp.base_ang_vel) + base_yaw_roll = ObsTerm(func=mdp.base_yaw_roll) + base_angle_to_target = ObsTerm(func=mdp.base_angle_to_target, params={"target_pos": (1000.0, 0.0, 0.0)}) + base_up_proj = ObsTerm(func=mdp.base_up_proj) + base_heading_proj = ObsTerm(func=mdp.base_heading_proj, params={"target_pos": (1000.0, 0.0, 0.0)}) + joint_pos_norm = ObsTerm(func=mdp.joint_pos_limit_normalized) + joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel, scale=0.2) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={"pose_range": {}, "velocity_range": {}}, + ) + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "position_range": (-0.2, 0.2), + "velocity_range": (-0.1, 0.1), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + # (1) Reward for moving forward + progress = RewTerm(func=mdp.progress_reward, weight=1.0, params={"target_pos": (1000.0, 0.0, 0.0)}) + # (2) Stay alive bonus + alive = RewTerm(func=mdp.is_alive, weight=0.5) + # (3) Reward for non-upright posture + upright = RewTerm(func=mdp.upright_posture_bonus, weight=0.1, params={"threshold": 0.93}) + # (4) Reward for moving in the right direction + move_to_target = RewTerm( + func=mdp.move_to_target_bonus, weight=0.5, params={"threshold": 0.8, "target_pos": (1000.0, 0.0, 0.0)} + ) + # (5) Penalty for large action commands + action_l2 = RewTerm(func=mdp.action_l2, weight=-0.005) + # (6) Penalty for energy consumption + energy = RewTerm(func=mdp.power_consumption, weight=-0.05, params={"gear_ratio": {".*": 15.0}}) + # (7) Penalty for reaching close to joint limits + joint_pos_limits = RewTerm( + func=mdp.joint_pos_limits_penalty_ratio, weight=-0.1, params={"threshold": 0.99, "gear_ratio": {".*": 15.0}} + ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + # (1) Terminate if the episode length is exceeded + time_out = DoneTerm(func=mdp.time_out, time_out=True) + # (2) Terminate if the robot falls + torso_height = DoneTerm(func=mdp.root_height_below_minimum, params={"minimum_height": 0.31}) + + +## +# Environment configuration +## + + +@configclass +class AntEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the MuJoCo-style Ant walking environment.""" + + # Simulation settings + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=38, + nconmax=15, + ls_iterations=10, + cone="pyramidal", + ls_parallel=True, + impratio=1, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + # Scene settings + scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=5.0, clone_in_fabric=True) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 2 + self.episode_length_s = 16.0 + # simulation settings + self.sim.dt = 1 / 120.0 + self.sim.render_interval = self.decimation + # default friction material + self.sim.physics_material.static_friction = 1.0 + self.sim.physics_material.dynamic_friction = 1.0 diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/cartpole/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/cartpole/__init__.py index 17a4c5c03cd2..4e3324264949 100644 --- a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/cartpole/__init__.py +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/cartpole/__init__.py @@ -9,21 +9,22 @@ import gymnasium as gym +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.classic.cartpole import agents + +## +# Register Gym environments. +## + gym.register( id="Isaac-Cartpole-Warp-v0", entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", disable_env_checker=True, kwargs={ - # Use experimental Cartpole cfg (allows isolated modifications). - "env_cfg_entry_point": ( - "isaaclab_tasks_experimental.manager_based.classic.cartpole.cartpole_env_cfg:CartpoleEnvCfg" - ), - # Point agent configs to the existing task package. - "rl_games_cfg_entry_point": "isaaclab_tasks.manager_based.classic.cartpole.agents:rl_games_ppo_cfg.yaml", - "rsl_rl_cfg_entry_point": ( - "isaaclab_tasks.manager_based.classic.cartpole.agents.rsl_rl_ppo_cfg:CartpolePPORunnerCfg" - ), - "skrl_cfg_entry_point": "isaaclab_tasks.manager_based.classic.cartpole.agents:skrl_ppo_cfg.yaml", - "sb3_cfg_entry_point": "isaaclab_tasks.manager_based.classic.cartpole.agents:sb3_ppo_cfg.yaml", + "env_cfg_entry_point": f"{__name__}.cartpole_env_cfg:CartpoleEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CartpolePPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", }, ) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/__init__.py new file mode 100644 index 000000000000..c08e5156b926 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Humanoid locomotion environment (experimental manager-based entry point). +""" + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.classic.humanoid import agents + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Humanoid-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.humanoid_env_cfg:HumanoidEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:HumanoidPPORunnerCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/humanoid_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/humanoid_env_cfg.py new file mode 100644 index 000000000000..781541a495e6 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/humanoid_env_cfg.py @@ -0,0 +1,233 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_experimental.managers import ObservationTermCfg as ObsTerm +from isaaclab_experimental.managers import RewardTermCfg as RewTerm +from isaaclab_experimental.managers import TerminationTermCfg as DoneTerm +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +import isaaclab.sim as sim_utils +from isaaclab.assets import AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass + +import isaaclab_tasks_experimental.manager_based.classic.humanoid.mdp as mdp + +from isaaclab_assets.robots.humanoid import HUMANOID_CFG # isort:skip + + +## +# Scene definition +## + + +@configclass +class MySceneCfg(InteractiveSceneCfg): + """Configuration for the terrain scene with a humanoid robot.""" + + # terrain + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="plane", + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg(static_friction=1.0, dynamic_friction=1.0, restitution=0.0), + debug_vis=False, + ) + + # robot + robot = HUMANOID_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + + # lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DistantLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0), + ) + + +## +# MDP settings +## + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + joint_effort = mdp.JointEffortActionCfg( + asset_name="robot", + joint_names=[".*"], + scale={ + ".*_waist.*": 67.5, + ".*_upper_arm.*": 67.5, + "pelvis": 67.5, + ".*_lower_arm": 45.0, + ".*_thigh:0": 45.0, + ".*_thigh:1": 135.0, + ".*_thigh:2": 45.0, + ".*_shin": 90.0, + ".*_foot.*": 22.5, + }, + ) + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for the policy.""" + + base_height = ObsTerm(func=mdp.base_pos_z) + base_lin_vel = ObsTerm(func=mdp.base_lin_vel) + base_ang_vel = ObsTerm(func=mdp.base_ang_vel, scale=0.25) + base_yaw_roll = ObsTerm(func=mdp.base_yaw_roll) + base_angle_to_target = ObsTerm(func=mdp.base_angle_to_target, params={"target_pos": (1000.0, 0.0, 0.0)}) + base_up_proj = ObsTerm(func=mdp.base_up_proj) + base_heading_proj = ObsTerm(func=mdp.base_heading_proj, params={"target_pos": (1000.0, 0.0, 0.0)}) + joint_pos_norm = ObsTerm(func=mdp.joint_pos_limit_normalized) + joint_vel_rel = ObsTerm(func=mdp.joint_vel_rel, scale=0.1) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = False + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={"pose_range": {}, "velocity_range": {}}, + ) + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_offset, + mode="reset", + params={ + "position_range": (-0.2, 0.2), + "velocity_range": (-0.1, 0.1), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + # (1) Reward for moving forward + progress = RewTerm(func=mdp.progress_reward, weight=1.0, params={"target_pos": (1000.0, 0.0, 0.0)}) + # (2) Stay alive bonus + alive = RewTerm(func=mdp.is_alive, weight=2.0) + # (3) Reward for non-upright posture + upright = RewTerm(func=mdp.upright_posture_bonus, weight=0.1, params={"threshold": 0.93}) + # (4) Reward for moving in the right direction + move_to_target = RewTerm( + func=mdp.move_to_target_bonus, weight=0.5, params={"threshold": 0.8, "target_pos": (1000.0, 0.0, 0.0)} + ) + # (5) Penalty for large action commands + action_l2 = RewTerm(func=mdp.action_l2, weight=-0.01) + # (6) Penalty for energy consumption + energy = RewTerm( + func=mdp.power_consumption, + weight=-0.005, + params={ + "gear_ratio": { + ".*_waist.*": 67.5, + ".*_upper_arm.*": 67.5, + "pelvis": 67.5, + ".*_lower_arm": 45.0, + ".*_thigh:0": 45.0, + ".*_thigh:1": 135.0, + ".*_thigh:2": 45.0, + ".*_shin": 90.0, + ".*_foot.*": 22.5, + } + }, + ) + # (7) Penalty for reaching close to joint limits + joint_pos_limits = RewTerm( + func=mdp.joint_pos_limits_penalty_ratio, + weight=-0.25, + params={ + "threshold": 0.98, + "gear_ratio": { + ".*_waist.*": 67.5, + ".*_upper_arm.*": 67.5, + "pelvis": 67.5, + ".*_lower_arm": 45.0, + ".*_thigh:0": 45.0, + ".*_thigh:1": 135.0, + ".*_thigh:2": 45.0, + ".*_shin": 90.0, + ".*_foot.*": 22.5, + }, + }, + ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + # (1) Terminate if the episode length is exceeded + time_out = DoneTerm(func=mdp.time_out, time_out=True) + # (2) Terminate if the robot falls + torso_height = DoneTerm(func=mdp.root_height_below_minimum, params={"minimum_height": 0.8}) + + +@configclass +class HumanoidEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the MuJoCo-style Humanoid walking environment.""" + + # Scene settings + scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=5.0, clone_in_fabric=True) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 2 + self.episode_length_s = 16.0 + # simulation settings + self.sim: SimulationCfg = SimulationCfg( + dt=1 / 120.0, + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=80, + nconmax=25, + ls_iterations=15, + cone="pyramidal", + ls_parallel=True, + update_data_interval=2, + impratio=1, + integrator="implicitfast", + ), + num_substeps=2, + debug_mode=False, + ), + ) + # self.sim.dt = 1 / 120.0 + self.sim.render_interval = self.decimation + # default friction material + self.sim.physics_material.static_friction = 1.0 + self.sim.physics_material.dynamic_friction = 1.0 diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/mdp/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/mdp/__init__.py new file mode 100644 index 000000000000..df0802edf058 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/mdp/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the humanoid environments.""" + +from isaaclab_experimental.envs.mdp import * # noqa: F401, F403 + +from .observations import * # noqa: F401, F403 +from .rewards import * # noqa: F401, F403 diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/mdp/observations.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/mdp/observations.py new file mode 100644 index 000000000000..0210c407dc5f --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/mdp/observations.py @@ -0,0 +1,179 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp-first observation terms for the humanoid task. + +All observation functions follow the ``func(env, out, **params) -> None`` signature. +Dimensions are declared via ``out_dim`` on the ``@generic_io_descriptor_warp`` decorator. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import warp as wp +from isaaclab_experimental.envs.utils.io_descriptors import generic_io_descriptor_warp +from isaaclab_experimental.managers import SceneEntityCfg +from isaaclab_newton.kernels.state_kernels import rotate_vec_to_body_frame + +from isaaclab.assets import Articulation + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedEnv + + +@wp.kernel +def _base_yaw_roll_kernel( + root_quat_w: wp.array(dtype=wp.quatf), + out: wp.array(dtype=wp.float32, ndim=2), +): + """Extract yaw and roll angles from root quaternion (x, y, z, w layout).""" + i = wp.tid() + q = root_quat_w[i] + qx = q[0] + qy = q[1] + qz = q[2] + qw = q[3] + # roll = atan2(2*(qw*qx + qy*qz), 1 - 2*(qx^2 + qy^2)) + sin_roll = 2.0 * (qw * qx + qy * qz) + cos_roll = 1.0 - 2.0 * (qx * qx + qy * qy) + roll = wp.atan2(sin_roll, cos_roll) + # yaw = atan2(2*(qw*qz + qx*qy), 1 - 2*(qy^2 + qz^2)) + sin_yaw = 2.0 * (qw * qz + qx * qy) + cos_yaw = 1.0 - 2.0 * (qy * qy + qz * qz) + yaw = wp.atan2(sin_yaw, cos_yaw) + out[i, 0] = yaw + out[i, 1] = roll + + +@generic_io_descriptor_warp(out_dim=2, observation_type="RootState") +def base_yaw_roll(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Yaw and roll of the base in the simulation world frame. Shape: (num_envs, 2).""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_base_yaw_roll_kernel, + dim=env.num_envs, + inputs=[asset.data.root_quat_w, out], + device=env.device, + ) + + +# Inline Tier 1 access: derives projected gravity directly from root_link_pose_w, +# avoiding the lazy TimestampedWarpBuffer which is not CUDA-graph-capturable. +# See GRAPH_CAPTURE_MIGRATION.md in isaaclab_newton for background. + + +@wp.kernel +def _base_up_proj_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + gravity_w: wp.array(dtype=wp.vec3f), + out: wp.array(dtype=wp.float32, ndim=2), +): + """Project base up vector onto world up: -gravity_b[2].""" + i = wp.tid() + out[i, 0] = -rotate_vec_to_body_frame(gravity_w[0], root_pose_w[i])[2] + + +@generic_io_descriptor_warp(out_dim=1, observation_type="RootState") +def base_up_proj(env: ManagerBasedEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Projection of the base up vector onto the world up vector. Shape: (num_envs, 1).""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_base_up_proj_kernel, + dim=env.num_envs, + inputs=[asset.data.root_link_pose_w, asset.data.GRAVITY_VEC_W, out], + device=env.device, + ) + + +@wp.kernel +def _base_heading_proj_kernel( + root_pos_w: wp.array(dtype=wp.vec3f), + root_quat_w: wp.array(dtype=wp.quatf), + target_x: float, + target_y: float, + target_z: float, + out: wp.array(dtype=wp.float32, ndim=2), +): + """Dot product between robot forward and direction to target.""" + i = wp.tid() + pos = root_pos_w[i] + q = root_quat_w[i] + # compute direction to target (zeroed z) + dx = target_x - pos[0] + dy = target_y - pos[1] + dist = wp.sqrt(dx * dx + dy * dy) + # avoid division by zero + inv_dist = wp.where(dist > 1.0e-6, 1.0 / dist, 0.0) + to_target_x = dx * inv_dist + to_target_y = dy * inv_dist + # compute forward vector via quaternion rotation of (1,0,0) + fwd = wp.quat_rotate(q, wp.vec3f(1.0, 0.0, 0.0)) + # dot product (xy only) + heading_proj = fwd[0] * to_target_x + fwd[1] * to_target_y + out[i, 0] = heading_proj + + +@generic_io_descriptor_warp(out_dim=1, observation_type="RootState") +def base_heading_proj( + env: ManagerBasedEnv, + out, + target_pos: tuple[float, float, float], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> None: + """Dot product between the base forward direction and direction to target. Shape: (num_envs, 1).""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_base_heading_proj_kernel, + dim=env.num_envs, + inputs=[asset.data.root_pos_w, asset.data.root_quat_w, target_pos[0], target_pos[1], target_pos[2], out], + device=env.device, + ) + + +@wp.kernel +def _base_angle_to_target_kernel( + root_pos_w: wp.array(dtype=wp.vec3f), + root_quat_w: wp.array(dtype=wp.quatf), + target_x: float, + target_y: float, + out: wp.array(dtype=wp.float32, ndim=2), +): + """Angle between base forward and vector to target, normalized to [-pi, pi].""" + i = wp.tid() + pos = root_pos_w[i] + q = root_quat_w[i] + # angle to target in world frame + dx = target_x - pos[0] + dy = target_y - pos[1] + walk_target_angle = wp.atan2(dy, dx) + # extract yaw from quaternion + qx = q[0] + qy = q[1] + qz = q[2] + qw = q[3] + sin_yaw = 2.0 * (qw * qz + qx * qy) + cos_yaw = 1.0 - 2.0 * (qy * qy + qz * qz) + yaw = wp.atan2(sin_yaw, cos_yaw) + # normalize to [-pi, pi] + angle = walk_target_angle - yaw + out[i, 0] = wp.atan2(wp.sin(angle), wp.cos(angle)) + + +@generic_io_descriptor_warp(out_dim=1, observation_type="RootState") +def base_angle_to_target( + env: ManagerBasedEnv, + out, + target_pos: tuple[float, float, float], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> None: + """Angle between the base forward vector and the vector to the target. Shape: (num_envs, 1).""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_base_angle_to_target_kernel, + dim=env.num_envs, + inputs=[asset.data.root_pos_w, asset.data.root_quat_w, target_pos[0], target_pos[1], out], + device=env.device, + ) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/mdp/rewards.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/mdp/rewards.py new file mode 100644 index 000000000000..e45ca6cac4d1 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/classic/humanoid/mdp/rewards.py @@ -0,0 +1,314 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp-first reward terms for the humanoid task. + +All reward functions follow the ``func(env, out, **params) -> None`` signature +where ``out`` is a pre-allocated Warp array of shape ``(num_envs,)`` with float32 dtype. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import torch +import warp as wp +from isaaclab_experimental.managers import SceneEntityCfg +from isaaclab_experimental.managers.manager_base import ManagerTermBase +from isaaclab_newton.kernels.state_kernels import rotate_vec_to_body_frame + +import isaaclab.utils.string as string_utils +from isaaclab.assets import Articulation + +if TYPE_CHECKING: + from isaaclab_experimental.managers.manager_term_cfg import RewardTermCfg + + from isaaclab.envs import ManagerBasedRLEnv + + +# --------------------------------------------------------------------------- +# Function-based reward terms +# --------------------------------------------------------------------------- + + +# Inline Tier 1 access: derives projected gravity directly from root_link_pose_w, +# avoiding the lazy TimestampedWarpBuffer which is not CUDA-graph-capturable. +# See GRAPH_CAPTURE_MIGRATION.md in isaaclab_newton for background. +# If ArticulationData Tier 2 lazy update is made graph-safe, this can revert to +# reading asset.data.projected_gravity_b directly. + + +@wp.kernel +def _upright_posture_bonus_kernel( + root_pose_w: wp.array(dtype=wp.transformf), + gravity_w: wp.array(dtype=wp.vec3f), + threshold: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + up_proj = -rotate_vec_to_body_frame(gravity_w[0], root_pose_w[i])[2] + out[i] = wp.where(up_proj > threshold, 1.0, 0.0) + + +def upright_posture_bonus( + env: ManagerBasedRLEnv, out, threshold: float, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> None: + """Reward for maintaining an upright posture. Writes 1.0 if up_proj > threshold, else 0.0.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_upright_posture_bonus_kernel, + dim=env.num_envs, + inputs=[asset.data.root_link_pose_w, asset.data.GRAVITY_VEC_W, threshold, out], + device=env.device, + ) + + +@wp.kernel +def _move_to_target_bonus_kernel( + root_pos_w: wp.array(dtype=wp.vec3f), + root_quat_w: wp.array(dtype=wp.quatf), + target_x: float, + target_y: float, + threshold: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + pos = root_pos_w[i] + q = root_quat_w[i] + # direction to target + dx = target_x - pos[0] + dy = target_y - pos[1] + dist = wp.sqrt(dx * dx + dy * dy) + inv_dist = wp.where(dist > 1.0e-6, 1.0 / dist, 0.0) + to_target_x = dx * inv_dist + to_target_y = dy * inv_dist + # forward vector + fwd = wp.quat_rotate(q, wp.vec3f(1.0, 0.0, 0.0)) + heading_proj = fwd[0] * to_target_x + fwd[1] * to_target_y + out[i] = wp.where(heading_proj > threshold, 1.0, heading_proj / threshold) + + +def move_to_target_bonus( + env: ManagerBasedRLEnv, + out, + threshold: float, + target_pos: tuple[float, float, float], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), +) -> None: + """Reward for heading towards the target.""" + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_move_to_target_bonus_kernel, + dim=env.num_envs, + inputs=[asset.data.root_pos_w, asset.data.root_quat_w, target_pos[0], target_pos[1], threshold, out], + device=env.device, + ) + + +# --------------------------------------------------------------------------- +# Class-based reward terms +# --------------------------------------------------------------------------- + + +@wp.kernel +def _progress_reward_reset_kernel( + env_mask: wp.array(dtype=wp.bool), + root_pos_w: wp.array(dtype=wp.vec3f), + target_x: float, + target_y: float, + target_z: float, + inv_step_dt: float, + potentials: wp.array(dtype=wp.float32), +): + i = wp.tid() + if env_mask[i]: + pos = root_pos_w[i] + dx = target_x - pos[0] + dy = target_y - pos[1] + dz = target_z - pos[2] + dist = wp.sqrt(dx * dx + dy * dy + dz * dz) + potentials[i] = -dist * inv_step_dt + + +@wp.kernel +def _progress_reward_kernel( + root_pos_w: wp.array(dtype=wp.vec3f), + target_x: float, + target_y: float, + inv_step_dt: float, + potentials: wp.array(dtype=wp.float32), + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + pos = root_pos_w[i] + dx = target_x - pos[0] + dy = target_y - pos[1] + # z component is zeroed (xy distance only, matching stable) + dist = wp.sqrt(dx * dx + dy * dy) + prev = potentials[i] + pot = -dist * inv_step_dt + potentials[i] = pot + out[i] = pot - prev + + +class progress_reward(ManagerTermBase): + """Reward for making progress towards the target (potential-based).""" + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + super().__init__(cfg, env) + self.potentials = wp.zeros(env.num_envs, dtype=wp.float32, device=env.device) + self._target_pos = cfg.params["target_pos"] + + def reset(self, env_mask: wp.array | None = None) -> None: + if env_mask is None: + self.potentials.zero_() + return + asset: Articulation = self._env.scene["robot"] + inv_dt = 1.0 / self._env.step_dt + wp.launch( + kernel=_progress_reward_reset_kernel, + dim=self.num_envs, + inputs=[ + env_mask, + asset.data.root_pos_w, + self._target_pos[0], + self._target_pos[1], + self._target_pos[2], + inv_dt, + self.potentials, + ], + device=self.device, + ) + + def __call__( + self, + env: ManagerBasedRLEnv, + out, + target_pos: tuple[float, float, float], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ) -> None: + asset: Articulation = env.scene[asset_cfg.name] + inv_dt = 1.0 / env.step_dt + wp.launch( + kernel=_progress_reward_kernel, + dim=env.num_envs, + inputs=[asset.data.root_pos_w, target_pos[0], target_pos[1], inv_dt, self.potentials, out], + device=env.device, + ) + + +@wp.kernel +def _joint_pos_limits_penalty_ratio_kernel( + joint_pos: wp.array(dtype=wp.float32, ndim=2), + soft_limits: wp.array(dtype=wp.vec2f, ndim=2), + gear_ratio_scaled: wp.array(dtype=wp.float32, ndim=2), + threshold: float, + inv_range: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + n_joints = joint_pos.shape[1] + s = float(0.0) + for j in range(n_joints): + lim = soft_limits[i, j] + lower = lim.x + upper = lim.y + mid = (lower + upper) * 0.5 + half_range = (upper - lower) * 0.5 + scaled = float(0.0) + if half_range > 0.0: + scaled = (joint_pos[i, j] - mid) / half_range + abs_scaled = wp.abs(scaled) + if abs_scaled > threshold: + violation = (abs_scaled - threshold) * inv_range + s += violation * gear_ratio_scaled[i, j] + out[i] = s + + +class joint_pos_limits_penalty_ratio(ManagerTermBase): + """Penalty for violating joint position limits weighted by the gear ratio.""" + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + asset_cfg = cfg.params.get("asset_cfg", SceneEntityCfg("robot")) + asset: Articulation = env.scene[asset_cfg.name] + + # resolve the gear ratio for each joint (torch in __init__ is fine) + gear_ratio = torch.ones(env.num_envs, asset.num_joints, device=env.device) + index_list, _, value_list = string_utils.resolve_matching_names_values( + cfg.params["gear_ratio"], asset.joint_names + ) + gear_ratio[:, index_list] = torch.tensor(value_list, device=env.device) + gear_ratio_scaled = gear_ratio / torch.max(gear_ratio) + self._gear_ratio_scaled_wp = wp.from_torch(gear_ratio_scaled) + + def __call__( + self, + env: ManagerBasedRLEnv, + out, + threshold: float, + gear_ratio: dict[str, float], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ) -> None: + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_joint_pos_limits_penalty_ratio_kernel, + dim=env.num_envs, + inputs=[ + asset.data.joint_pos, + asset.data.soft_joint_pos_limits, + self._gear_ratio_scaled_wp, + threshold, + 1.0 / (1.0 - threshold), + out, + ], + device=env.device, + ) + + +@wp.kernel +def _power_consumption_kernel( + action: wp.array(dtype=wp.float32, ndim=2), + joint_vel: wp.array(dtype=wp.float32, ndim=2), + gear_ratio_scaled: wp.array(dtype=wp.float32, ndim=2), + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + n_joints = action.shape[1] + s = float(0.0) + for j in range(n_joints): + s += wp.abs(action[i, j] * joint_vel[i, j] * gear_ratio_scaled[i, j]) + out[i] = s + + +class power_consumption(ManagerTermBase): + """Penalty for the power consumed by the actions to the environment.""" + + def __init__(self, cfg: RewardTermCfg, env: ManagerBasedRLEnv): + asset_cfg = cfg.params.get("asset_cfg", SceneEntityCfg("robot")) + asset: Articulation = env.scene[asset_cfg.name] + + # resolve the gear ratio for each joint (torch in __init__ is fine) + gear_ratio = torch.ones(env.num_envs, asset.num_joints, device=env.device) + index_list, _, value_list = string_utils.resolve_matching_names_values( + cfg.params["gear_ratio"], asset.joint_names + ) + gear_ratio[:, index_list] = torch.tensor(value_list, device=env.device) + gear_ratio_scaled = gear_ratio / torch.max(gear_ratio) + self._gear_ratio_scaled_wp = wp.from_torch(gear_ratio_scaled) + + def __call__( + self, + env: ManagerBasedRLEnv, + out, + gear_ratio: dict[str, float], + asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), + ) -> None: + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_power_consumption_kernel, + dim=env.num_envs, + inputs=[env.action_manager.action, asset.data.joint_vel, self._gear_ratio_scaled_wp, out], + device=env.device, + ) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/__init__.py new file mode 100644 index 000000000000..0660d38f0658 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Locomotion experimental task registrations (manager-based).""" diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/__init__.py new file mode 100644 index 000000000000..0857176a3fc7 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Velocity locomotion experimental task registrations (manager-based).""" diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/__init__.py new file mode 100644 index 000000000000..26f3257daef4 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Configurations for velocity-based locomotion environments.""" + +# We leave this file empty since we don't want to expose any configs in this package directly. +# We still need this file to import the "config" module in the parent package. diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/a1/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/a1/__init__.py new file mode 100644 index 000000000000..6c79524e8532 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/a1/__init__.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.locomotion.velocity.config.a1 import agents + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Velocity-Flat-Unitree-A1-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:UnitreeA1FlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UnitreeA1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Velocity-Flat-Unitree-A1-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:UnitreeA1FlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UnitreeA1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{agents.__name__}:sb3_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/a1/flat_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/a1/flat_env_cfg.py new file mode 100644 index 000000000000..b27f8098d629 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/a1/flat_env_cfg.py @@ -0,0 +1,60 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +from .rough_env_cfg import UnitreeA1RoughEnvCfg + + +@configclass +class UnitreeA1FlatEnvCfg(UnitreeA1RoughEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=60, + nconmax=30, + cone="pyramidal", + impratio=1, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # override rewards + self.rewards.flat_orientation_l2.weight = -2.5 + self.rewards.feet_air_time.weight = 0.25 + + # change terrain to flat + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + # no height scan + # self.scene.height_scanner = None + # self.observations.policy.height_scan = None + # no terrain curriculum + self.curriculum.terrain_levels = None + + +class UnitreeA1FlatEnvCfg_PLAY(UnitreeA1FlatEnvCfg): + def __post_init__(self) -> None: + # post init of parent + super().__post_init__() + + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False + # remove random pushing event + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/a1/rough_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/a1/rough_env_cfg.py new file mode 100644 index 000000000000..03aec88a4018 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/a1/rough_env_cfg.py @@ -0,0 +1,91 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_experimental.managers import TerminationTermCfg as DoneTerm + +from isaaclab.utils import configclass + +import isaaclab_tasks_experimental.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks_experimental.manager_based.locomotion.velocity.velocity_env_cfg import ( + LocomotionVelocityRoughEnvCfg, + TerminationsCfg, +) + +from isaaclab_assets.robots.unitree import UNITREE_A1_CFG # isort: skip + + +class TerminationsCfg_A1(TerminationsCfg): + base_too_low = DoneTerm(func=mdp.root_height_below_minimum, params={"minimum_height": 0.2}) + + +@configclass +class UnitreeA1RoughEnvCfg(LocomotionVelocityRoughEnvCfg): + terminations: TerminationsCfg_A1 = TerminationsCfg_A1() + + def __post_init__(self): + # post init of parent + super().__post_init__() + + self.scene.robot = UNITREE_A1_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.terrain.terrain_generator.sub_terrains["boxes"].grid_height_range = (0.025, 0.1) + self.scene.terrain.terrain_generator.sub_terrains["random_rough"].noise_range = (0.01, 0.06) + self.scene.terrain.terrain_generator.sub_terrains["random_rough"].noise_step = 0.01 + + # reduce action scale + self.actions.joint_pos.scale = 0.25 + + # event + self.events.push_robot = None + # TODO: TEMPORARILY DISABLED - adding this causes NaNs in the simulation + # self.events.add_base_mass.params["mass_distribution_params"] = (-1.0, 3.0) + # self.events.add_base_mass.params["asset_cfg"].body_names = "trunk" + self.events.base_external_force_torque.params["asset_cfg"].body_names = "trunk" + self.events.reset_robot_joints.params["position_range"] = (1.0, 1.0) + self.events.reset_base.params = { + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + }, + } + self.events.base_com = None + + # rewards + self.rewards.feet_air_time.params["sensor_cfg"].body_names = ".*_foot" + self.rewards.feet_air_time.weight = 0.01 + self.rewards.undesired_contacts.params["sensor_cfg"].body_names = ".*thigh" + self.rewards.dof_torques_l2.weight = -0.0002 + self.rewards.track_lin_vel_xy_exp.weight = 1.5 + self.rewards.track_ang_vel_z_exp.weight = 0.75 + self.rewards.dof_acc_l2.weight = -2.5e-7 + self.terminations.base_contact.params["sensor_cfg"].body_names = "trunk" + + +@configclass +class UnitreeA1RoughEnvCfg_PLAY(UnitreeA1RoughEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # spawn the robot randomly in the grid (instead of their terrain levels) + self.scene.terrain.max_init_terrain_level = None + # reduce the number of terrains to save memory + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + + # disable randomization for play + self.observations.policy.enable_corruption = False + # remove random pushing event + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_b/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_b/__init__.py new file mode 100644 index 000000000000..cbbf5290e82a --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_b/__init__.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.locomotion.velocity.config.anymal_b import agents + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Velocity-Flat-Anymal-B-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalBFlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBFlatPPORunnerWithSymmetryCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Velocity-Flat-Anymal-B-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalBFlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalBFlatPPORunnerWithSymmetryCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_b/flat_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_b/flat_env_cfg.py new file mode 100644 index 000000000000..28c0dc5c26b6 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_b/flat_env_cfg.py @@ -0,0 +1,60 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +from .rough_env_cfg import AnymalBRoughEnvCfg + + +@configclass +class AnymalBFlatEnvCfg(AnymalBRoughEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=75, + nconmax=15, + cone="elliptic", + impratio=100, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # override rewards + self.rewards.flat_orientation_l2.weight = -5.0 + self.rewards.dof_torques_l2.weight = -2.5e-5 + self.rewards.feet_air_time.weight = 0.5 + # change terrain to flat + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + # no height scan + self.scene.height_scanner = None + self.observations.policy.height_scan = None + # no terrain curriculum + self.curriculum.terrain_levels = None + + +class AnymalBFlatEnvCfg_PLAY(AnymalBFlatEnvCfg): + def __post_init__(self) -> None: + # post init of parent + super().__post_init__() + + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False + # remove random pushing event + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_b/rough_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_b/rough_env_cfg.py new file mode 100644 index 000000000000..9811356ef220 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_b/rough_env_cfg.py @@ -0,0 +1,34 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_tasks_experimental.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +from isaaclab_assets import ANYMAL_B_CFG # isort: skip + + +@configclass +class AnymalBRoughEnvCfg(LocomotionVelocityRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.robot = ANYMAL_B_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.manager_call_max_mode = {"Scene": 1} + + +@configclass +class AnymalBRoughEnvCfg_PLAY(AnymalBRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.scene.terrain.max_init_terrain_level = None + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_c/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_c/__init__.py new file mode 100644 index 000000000000..318b13cc4707 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_c/__init__.py @@ -0,0 +1,39 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.locomotion.velocity.config.anymal_c import agents + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Velocity-Flat-Anymal-C-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalCFlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCFlatPPORunnerWithSymmetryCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_flat_ppo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Velocity-Flat-Anymal-C-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalCFlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCFlatPPORunnerCfg", + "rsl_rl_with_symmetry_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalCFlatPPORunnerWithSymmetryCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_flat_ppo_cfg.yaml", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_c/flat_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_c/flat_env_cfg.py new file mode 100644 index 000000000000..e82cf9559d61 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_c/flat_env_cfg.py @@ -0,0 +1,52 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +from .rough_env_cfg import AnymalCRoughEnvCfg + + +@configclass +class AnymalCFlatEnvCfg(AnymalCRoughEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=120, + nconmax=15, + cone="elliptic", + impratio=100, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # override rewards + self.rewards.flat_orientation_l2.weight = -5.0 + self.rewards.dof_torques_l2.weight = -2.5e-5 + self.rewards.feet_air_time.weight = 0.5 + # change terrain to flat + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + # no terrain curriculum + self.curriculum.terrain_levels = None + + +class AnymalCFlatEnvCfg_PLAY(AnymalCFlatEnvCfg): + def __post_init__(self) -> None: + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_c/rough_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_c/rough_env_cfg.py new file mode 100644 index 000000000000..36af75613c07 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_c/rough_env_cfg.py @@ -0,0 +1,40 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_tasks_experimental.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +## +# Pre-defined configs +## +from isaaclab_assets.robots.anymal import ANYMAL_C_CFG # isort: skip + + +@configclass +class AnymalCRoughEnvCfg(LocomotionVelocityRoughEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # switch robot to anymal-c + self.scene.robot = ANYMAL_C_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.robot.actuators["legs"].armature = 0.01 + self.manager_call_max_mode = {"Scene": 1} + + +@configclass +class AnymalCRoughEnvCfg_PLAY(AnymalCRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.scene.terrain.max_init_terrain_level = None + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_d/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_d/__init__.py new file mode 100644 index 000000000000..e5e75d19dc2d --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_d/__init__.py @@ -0,0 +1,60 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.locomotion.velocity.config.anymal_d import agents + +## +# Register Gym environments. +## + +# Rough env disabled: requires isaaclab_physx which is not yet available on dev/newton. +# The package exists on upstream/develop (commit 308400f1d35) but has not been merged. +# Re-enable once dev/newton picks up isaaclab_physx. +# gym.register( +# id="Isaac-Velocity-Rough-Anymal-D-Warp-v0", +# entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", +# disable_env_checker=True, +# kwargs={ +# "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalDRoughEnvCfg", +# "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", +# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", +# }, +# ) + +# gym.register( +# id="Isaac-Velocity-Rough-Anymal-D-Warp-Play-v0", +# entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", +# disable_env_checker=True, +# kwargs={ +# "env_cfg_entry_point": f"{__name__}.rough_env_cfg:AnymalDRoughEnvCfg_PLAY", +# "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", +# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", +# }, +# ) + +gym.register( + id="Isaac-Velocity-Flat-Anymal-D-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalDFlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Velocity-Flat-Anymal-D-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:AnymalDFlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_d/flat_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_d/flat_env_cfg.py new file mode 100644 index 000000000000..1f98c1b56120 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_d/flat_env_cfg.py @@ -0,0 +1,46 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +from .rough_env_cfg import AnymalDRoughEnvCfg + + +@configclass +class AnymalDFlatEnvCfg(AnymalDRoughEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=60, + nconmax=25, + cone="elliptic", + impratio=100.0, + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + super().__post_init__() + self.rewards.flat_orientation_l2.weight = -5.0 + self.rewards.dof_torques_l2.weight = -2.5e-5 + self.rewards.feet_air_time.weight = 0.5 + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + self.curriculum.terrain_levels = None + + +class AnymalDFlatEnvCfg_PLAY(AnymalDFlatEnvCfg): + def __post_init__(self) -> None: + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_d/rough_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_d/rough_env_cfg.py new file mode 100644 index 000000000000..1cafa006ee64 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/anymal_d/rough_env_cfg.py @@ -0,0 +1,37 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_tasks_experimental.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +## +# Pre-defined configs +## +from isaaclab_assets.robots.anymal import ANYMAL_D_CFG # isort: skip + + +@configclass +class AnymalDRoughEnvCfg(LocomotionVelocityRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.robot = ANYMAL_D_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.manager_call_max_mode = {"Scene": 1} + + +@configclass +class AnymalDRoughEnvCfg_PLAY(AnymalDRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.scene.terrain.max_init_terrain_level = None + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/cassie/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/cassie/__init__.py new file mode 100644 index 000000000000..4d9d4a77883a --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/cassie/__init__.py @@ -0,0 +1,35 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.locomotion.velocity.config.cassie import agents + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Velocity-Flat-Cassie-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:CassieFlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CassieFlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Velocity-Flat-Cassie-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:CassieFlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:CassieFlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/cassie/flat_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/cassie/flat_env_cfg.py new file mode 100644 index 000000000000..bcc670cf3788 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/cassie/flat_env_cfg.py @@ -0,0 +1,45 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +from .rough_env_cfg import CassieRoughEnvCfg + + +@configclass +class CassieFlatEnvCfg(CassieRoughEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=52, + nconmax=15, + cone="pyramidal", + impratio=1, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + super().__post_init__() + self.rewards.flat_orientation_l2.weight = -2.5 + self.rewards.feet_air_time.weight = 5.0 + self.rewards.joint_deviation_hip.params["asset_cfg"].joint_names = ["hip_rotation_.*"] + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + self.curriculum.terrain_levels = None + + +class CassieFlatEnvCfg_PLAY(CassieFlatEnvCfg): + def __post_init__(self) -> None: + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/cassie/rough_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/cassie/rough_env_cfg.py new file mode 100644 index 000000000000..43341babeb3d --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/cassie/rough_env_cfg.py @@ -0,0 +1,96 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_experimental.managers import RewardTermCfg as RewTerm +from isaaclab_experimental.managers import SceneEntityCfg + +from isaaclab.utils import configclass + +import isaaclab_tasks_experimental.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks_experimental.manager_based.locomotion.velocity.velocity_env_cfg import ( + LocomotionVelocityRoughEnvCfg, + RewardsCfg, +) + +from isaaclab_assets.robots.cassie import CASSIE_CFG # isort: skip + + +@configclass +class CassieRewardsCfg(RewardsCfg): + termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0) + feet_air_time = RewTerm( + func=mdp.feet_air_time_positive_biped, + weight=2.5, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*toe"), + "command_name": "base_velocity", + "threshold": 0.3, + }, + ) + joint_deviation_hip = RewTerm( + func=mdp.joint_deviation_l1, + weight=-0.2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["hip_abduction_.*", "hip_rotation_.*"])}, + ) + joint_deviation_toes = RewTerm( + func=mdp.joint_deviation_l1, + weight=-0.2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=["toe_joint_.*"])}, + ) + dof_pos_limits = RewTerm( + func=mdp.joint_pos_limits, + weight=-1.0, + params={"asset_cfg": SceneEntityCfg("robot", joint_names="toe_joint_.*")}, + ) + + +@configclass +class CassieRoughEnvCfg(LocomotionVelocityRoughEnvCfg): + rewards: CassieRewardsCfg = CassieRewardsCfg() + + def __post_init__(self): + super().__post_init__() + self.scene.robot = CASSIE_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.actions.joint_pos.scale = 0.5 + self.events.push_robot = None + # TODO: TEMPORARILY DISABLED - adding this causes NaNs in the simulation + # self.events.add_base_mass = None + self.events.reset_robot_joints.params["position_range"] = (1.0, 1.0) + self.events.base_external_force_torque.params["asset_cfg"].body_names = [".*pelvis"] + self.events.reset_base.params = { + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + }, + } + self.events.base_com = None + self.terminations.base_contact.params["sensor_cfg"].body_names = [".*pelvis"] + self.rewards.undesired_contacts = None + self.rewards.dof_torques_l2.weight = -5.0e-6 + self.rewards.track_lin_vel_xy_exp.weight = 2.0 + self.rewards.track_ang_vel_z_exp.weight = 1.0 + self.rewards.action_rate_l2.weight *= 1.5 + self.rewards.dof_acc_l2.weight *= 1.5 + + +@configclass +class CassieRoughEnvCfg_PLAY(CassieRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.scene.terrain.max_init_terrain_level = None + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/g1/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/g1/__init__.py new file mode 100644 index 000000000000..83bdf047a488 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/g1/__init__.py @@ -0,0 +1,58 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.locomotion.velocity.config.g1 import agents + +## +# Register Gym environments. +## + +# gym.register( +# id="Isaac-Velocity-Rough-G1-Warp-v0", +# entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", +# disable_env_checker=True, +# kwargs={ +# "env_cfg_entry_point": f"{__name__}.rough_env_cfg:G1RoughEnvCfg", +# "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1RoughPPORunnerCfg", +# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", +# }, +# ) + + +# gym.register( +# id="Isaac-Velocity-Rough-G1-Warp-Play-v0", +# entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", +# disable_env_checker=True, +# kwargs={ +# "env_cfg_entry_point": f"{__name__}.rough_env_cfg:G1RoughEnvCfg_PLAY", +# "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1RoughPPORunnerCfg", +# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", +# }, +# ) + +gym.register( + id="Isaac-Velocity-Flat-G1-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:G1FlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Velocity-Flat-G1-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:G1FlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:G1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/g1/flat_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/g1/flat_env_cfg.py new file mode 100644 index 000000000000..3d9047a98506 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/g1/flat_env_cfg.py @@ -0,0 +1,58 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_experimental.managers import SceneEntityCfg +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +from .rough_env_cfg import G1RoughEnvCfg + + +@configclass +class G1FlatEnvCfg(G1RoughEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=95, + nconmax=10, + cone="pyramidal", + impratio=1, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + super().__post_init__() + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + self.curriculum.terrain_levels = None + self.rewards.track_ang_vel_z_exp.weight = 1.0 + self.rewards.lin_vel_z_l2.weight = -0.2 + self.rewards.action_rate_l2.weight = -0.005 + self.rewards.dof_acc_l2.weight = -1.0e-7 + self.rewards.feet_air_time.weight = 0.75 + self.rewards.feet_air_time.params["threshold"] = 0.4 + self.rewards.dof_torques_l2.weight = -2.0e-6 + self.rewards.dof_torques_l2.params["asset_cfg"] = SceneEntityCfg( + "robot", joint_names=[".*_hip_.*", ".*_knee_joint"] + ) + self.commands.base_velocity.ranges.lin_vel_x = (0.0, 1.0) + self.commands.base_velocity.ranges.lin_vel_y = (-0.5, 0.5) + self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0) + + +class G1FlatEnvCfg_PLAY(G1FlatEnvCfg): + def __post_init__(self) -> None: + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/g1/rough_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/g1/rough_env_cfg.py new file mode 100644 index 000000000000..db4cc159e4c5 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/g1/rough_env_cfg.py @@ -0,0 +1,178 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_experimental.managers import RewardTermCfg as RewTerm +from isaaclab_experimental.managers import SceneEntityCfg + +from isaaclab.utils import configclass + +import isaaclab_tasks_experimental.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks_experimental.manager_based.locomotion.velocity.velocity_env_cfg import ( + LocomotionVelocityRoughEnvCfg, + RewardsCfg, +) + +## +# Pre-defined configs +## +from isaaclab_assets import G1_MINIMAL_CFG # isort: skip + + +@configclass +class G1Rewards(RewardsCfg): + """Reward terms for the MDP.""" + + termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0) + track_lin_vel_xy_exp = RewTerm( + func=mdp.track_lin_vel_xy_yaw_frame_exp, + weight=1.0, + params={"command_name": "base_velocity", "std": 0.5}, + ) + track_ang_vel_z_exp = RewTerm( + func=mdp.track_ang_vel_z_world_exp, weight=2.0, params={"command_name": "base_velocity", "std": 0.5} + ) + feet_air_time = RewTerm( + func=mdp.feet_air_time_positive_biped, + weight=0.25, + params={ + "command_name": "base_velocity", + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_ankle_roll_link"), + "threshold": 0.4, + }, + ) + feet_slide = RewTerm( + func=mdp.feet_slide, + weight=-0.1, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*_ankle_roll_link"), + "asset_cfg": SceneEntityCfg("robot", body_names=".*_ankle_roll_link"), + }, + ) + dof_pos_limits = RewTerm( + func=mdp.joint_pos_limits, + weight=-1.0, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_ankle_pitch_joint", ".*_ankle_roll_joint"])}, + ) + joint_deviation_hip = RewTerm( + func=mdp.joint_deviation_l1, + weight=-0.1, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_hip_yaw_joint", ".*_hip_roll_joint"])}, + ) + joint_deviation_arms = RewTerm( + func=mdp.joint_deviation_l1, + weight=-0.1, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + ".*_shoulder_pitch_joint", + ".*_shoulder_roll_joint", + ".*_shoulder_yaw_joint", + ".*_elbow_pitch_joint", + ".*_elbow_roll_joint", + ], + ) + }, + ) + joint_deviation_fingers = RewTerm( + func=mdp.joint_deviation_l1, + weight=-0.05, + params={ + "asset_cfg": SceneEntityCfg( + "robot", + joint_names=[ + ".*_five_joint", + ".*_three_joint", + ".*_six_joint", + ".*_four_joint", + ".*_zero_joint", + ".*_one_joint", + ".*_two_joint", + ], + ) + }, + ) + joint_deviation_torso = RewTerm( + func=mdp.joint_deviation_l1, + weight=-0.1, + params={"asset_cfg": SceneEntityCfg("robot", joint_names="torso_joint")}, + ) + + +@configclass +class G1RoughEnvCfg(LocomotionVelocityRoughEnvCfg): + rewards: G1Rewards = G1Rewards() + + def __post_init__(self): + super().__post_init__() + self.scene.robot = G1_MINIMAL_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.events.push_robot = None + # TODO: TEMPORARILY DISABLED - adding this causes NaNs in the simulation + # self.events.add_base_mass = None + self.events.reset_robot_joints.params["position_range"] = (1.0, 1.0) + self.events.base_external_force_torque.params["asset_cfg"].body_names = ["torso_link"] + self.events.reset_base.params = { + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + }, + } + self.events.base_com = None + + # Rewards + self.rewards.lin_vel_z_l2.weight = 0.0 + self.rewards.undesired_contacts = None + self.rewards.flat_orientation_l2.weight = -1.0 + self.rewards.action_rate_l2.weight = -0.005 + self.rewards.dof_acc_l2.weight = -1.25e-7 + self.rewards.dof_acc_l2.params["asset_cfg"] = SceneEntityCfg( + "robot", joint_names=[".*_hip_.*", ".*_knee_joint"] + ) + self.rewards.dof_torques_l2.weight = -1.5e-7 + self.rewards.dof_torques_l2.params["asset_cfg"] = SceneEntityCfg( + "robot", joint_names=[".*_hip_.*", ".*_knee_joint", ".*_ankle_.*"] + ) + + # Commands + self.commands.base_velocity.ranges.lin_vel_x = (0.0, 1.0) + self.commands.base_velocity.ranges.lin_vel_y = (-0.0, 0.0) + self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0) + + # terminations + self.terminations.base_contact.params["sensor_cfg"].body_names = "torso_link" + + +@configclass +class G1RoughEnvCfg_PLAY(G1RoughEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.episode_length_s = 40.0 + # spawn the robot randomly in the grid (instead of their terrain levels) + self.scene.terrain.max_init_terrain_level = None + # reduce the number of terrains to save memory + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + + self.commands.base_velocity.ranges.lin_vel_x = (1.0, 1.0) + self.commands.base_velocity.ranges.lin_vel_y = (0.0, 0.0) + self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0) + self.commands.base_velocity.ranges.heading = (0.0, 0.0) + # disable randomization for play + self.observations.policy.enable_corruption = False + # remove random pushing + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go1/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go1/__init__.py new file mode 100644 index 000000000000..038f55740728 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go1/__init__.py @@ -0,0 +1,35 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.locomotion.velocity.config.go1 import agents + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Velocity-Flat-Unitree-Go1-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:UnitreeGo1FlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UnitreeGo1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Velocity-Flat-Unitree-Go1-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:UnitreeGo1FlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UnitreeGo1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go1/flat_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go1/flat_env_cfg.py new file mode 100644 index 000000000000..e4fbc73e1d08 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go1/flat_env_cfg.py @@ -0,0 +1,46 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +from .rough_env_cfg import UnitreeGo1RoughEnvCfg + + +@configclass +class UnitreeGo1FlatEnvCfg(UnitreeGo1RoughEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=60, + nconmax=25, + cone="pyramidal", + impratio=1, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + super().__post_init__() + self.rewards.flat_orientation_l2.weight = -2.5 + self.rewards.feet_air_time.weight = 0.25 + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + self.curriculum.terrain_levels = None + + +class UnitreeGo1FlatEnvCfg_PLAY(UnitreeGo1FlatEnvCfg): + def __post_init__(self) -> None: + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go1/rough_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go1/rough_env_cfg.py new file mode 100644 index 000000000000..2864bbba3130 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go1/rough_env_cfg.py @@ -0,0 +1,61 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_tasks_experimental.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +from isaaclab_assets.robots.unitree import UNITREE_GO1_CFG # isort: skip + + +@configclass +class UnitreeGo1RoughEnvCfg(LocomotionVelocityRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.robot = UNITREE_GO1_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.manager_call_max_mode = {"Scene": 1} + self.scene.terrain.terrain_generator.sub_terrains["boxes"].grid_height_range = (0.025, 0.1) + self.scene.terrain.terrain_generator.sub_terrains["random_rough"].noise_range = (0.01, 0.06) + self.scene.terrain.terrain_generator.sub_terrains["random_rough"].noise_step = 0.01 + self.actions.joint_pos.scale = 0.25 + self.events.push_robot = None + self.events.base_external_force_torque.params["asset_cfg"].body_names = "trunk" + self.events.reset_robot_joints.params["position_range"] = (1.0, 1.0) + self.events.reset_base.params = { + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + }, + } + self.events.base_com = None + self.rewards.feet_air_time.params["sensor_cfg"].body_names = ".*_foot" + self.rewards.feet_air_time.weight = 0.01 + self.rewards.undesired_contacts = None + self.rewards.dof_torques_l2.weight = -0.0002 + self.rewards.track_lin_vel_xy_exp.weight = 1.5 + self.rewards.track_ang_vel_z_exp.weight = 0.75 + self.rewards.dof_acc_l2.weight = -2.5e-7 + self.terminations.base_contact.params["sensor_cfg"].body_names = "trunk" + + +@configclass +class UnitreeGo1RoughEnvCfg_PLAY(UnitreeGo1RoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.scene.terrain.max_init_terrain_level = None + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go2/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go2/__init__.py new file mode 100644 index 000000000000..7e124029c682 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go2/__init__.py @@ -0,0 +1,35 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.locomotion.velocity.config.go2 import agents + +## +# Register Gym environments. +## + +gym.register( + id="Isaac-Velocity-Flat-Unitree-Go2-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:UnitreeGo2FlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UnitreeGo2FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Velocity-Flat-Unitree-Go2-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:UnitreeGo2FlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UnitreeGo2FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go2/flat_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go2/flat_env_cfg.py new file mode 100644 index 000000000000..ad8a8aa862f0 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go2/flat_env_cfg.py @@ -0,0 +1,46 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +from .rough_env_cfg import UnitreeGo2RoughEnvCfg + + +@configclass +class UnitreeGo2FlatEnvCfg(UnitreeGo2RoughEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=65, + nconmax=35, + cone="pyramidal", + impratio=1, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + super().__post_init__() + self.rewards.flat_orientation_l2.weight = -2.5 + self.rewards.feet_air_time.weight = 0.25 + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + self.curriculum.terrain_levels = None + + +class UnitreeGo2FlatEnvCfg_PLAY(UnitreeGo2FlatEnvCfg): + def __post_init__(self) -> None: + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go2/rough_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go2/rough_env_cfg.py new file mode 100644 index 000000000000..ff13b7e86176 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/go2/rough_env_cfg.py @@ -0,0 +1,60 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab.utils import configclass + +from isaaclab_tasks_experimental.manager_based.locomotion.velocity.velocity_env_cfg import LocomotionVelocityRoughEnvCfg + +from isaaclab_assets.robots.unitree import UNITREE_GO2_CFG # isort: skip + + +@configclass +class UnitreeGo2RoughEnvCfg(LocomotionVelocityRoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.robot = UNITREE_GO2_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.scene.terrain.terrain_generator.sub_terrains["boxes"].grid_height_range = (0.025, 0.1) + self.scene.terrain.terrain_generator.sub_terrains["random_rough"].noise_range = (0.01, 0.06) + self.scene.terrain.terrain_generator.sub_terrains["random_rough"].noise_step = 0.01 + self.actions.joint_pos.scale = 0.25 + self.events.push_robot = None + self.events.base_external_force_torque.params["asset_cfg"].body_names = "base" + self.events.reset_robot_joints.params["position_range"] = (1.0, 1.0) + self.events.reset_base.params = { + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + }, + } + self.events.base_com = None + self.rewards.feet_air_time.params["sensor_cfg"].body_names = ".*_foot" + self.rewards.feet_air_time.weight = 0.01 + self.rewards.undesired_contacts = None + self.rewards.dof_torques_l2.weight = -0.0002 + self.rewards.track_lin_vel_xy_exp.weight = 1.5 + self.rewards.track_ang_vel_z_exp.weight = 0.75 + self.rewards.dof_acc_l2.weight = -2.5e-7 + self.terminations.base_contact.params["sensor_cfg"].body_names = "base" + + +@configclass +class UnitreeGo2RoughEnvCfg_PLAY(UnitreeGo2RoughEnvCfg): + def __post_init__(self): + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.scene.terrain.max_init_terrain_level = None + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/h1/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/h1/__init__.py new file mode 100644 index 000000000000..95a1e8f29e3f --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/h1/__init__.py @@ -0,0 +1,57 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.locomotion.velocity.config.h1 import agents + +## +# Register Gym environments. +## + +# gym.register( +# id="Isaac-Velocity-Rough-H1-Warp-v0", +# entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", +# disable_env_checker=True, +# kwargs={ +# "env_cfg_entry_point": f"{__name__}.rough_env_cfg:H1RoughEnvCfg", +# "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:H1RoughPPORunnerCfg", +# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", +# }, +# ) + +# gym.register( +# id="Isaac-Velocity-Rough-H1-Warp-Play-v0", +# entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", +# disable_env_checker=True, +# kwargs={ +# "env_cfg_entry_point": f"{__name__}.rough_env_cfg:H1RoughEnvCfg_PLAY", +# "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:H1RoughPPORunnerCfg", +# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", +# }, +# ) + +gym.register( + id="Isaac-Velocity-Flat-H1-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:H1FlatEnvCfg", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:H1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Velocity-Flat-H1-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.flat_env_cfg:H1FlatEnvCfg_PLAY", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:H1FlatPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/h1/flat_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/h1/flat_env_cfg.py new file mode 100644 index 000000000000..22648c27a2c5 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/h1/flat_env_cfg.py @@ -0,0 +1,46 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +from .rough_env_cfg import H1RoughEnvCfg + + +@configclass +class H1FlatEnvCfg(H1RoughEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=65, + nconmax=15, + cone="pyramidal", + impratio=1, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + super().__post_init__() + self.scene.terrain.terrain_type = "plane" + self.scene.terrain.terrain_generator = None + self.curriculum.terrain_levels = None + self.rewards.feet_air_time.weight = 1.0 + self.rewards.feet_air_time.params["threshold"] = 0.6 + + +class H1FlatEnvCfg_PLAY(H1FlatEnvCfg): + def __post_init__(self) -> None: + super().__post_init__() + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.observations.policy.enable_corruption = False + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/h1/rough_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/h1/rough_env_cfg.py new file mode 100644 index 000000000000..edb71956a3fc --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/config/h1/rough_env_cfg.py @@ -0,0 +1,131 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from isaaclab_experimental.managers import RewardTermCfg as RewTerm +from isaaclab_experimental.managers import SceneEntityCfg + +from isaaclab.utils import configclass + +import isaaclab_tasks_experimental.manager_based.locomotion.velocity.mdp as mdp +from isaaclab_tasks_experimental.manager_based.locomotion.velocity.velocity_env_cfg import ( + LocomotionVelocityRoughEnvCfg, + RewardsCfg, +) + +## +# Pre-defined configs +## +from isaaclab_assets import H1_MINIMAL_CFG # isort: skip + + +@configclass +class H1Rewards(RewardsCfg): + """Reward terms for the MDP.""" + + termination_penalty = RewTerm(func=mdp.is_terminated, weight=-200.0) + lin_vel_z_l2 = None + track_lin_vel_xy_exp = RewTerm( + func=mdp.track_lin_vel_xy_yaw_frame_exp, + weight=1.0, + params={"command_name": "base_velocity", "std": 0.5}, + ) + track_ang_vel_z_exp = RewTerm( + func=mdp.track_ang_vel_z_world_exp, weight=1.0, params={"command_name": "base_velocity", "std": 0.5} + ) + feet_air_time = RewTerm( + func=mdp.feet_air_time_positive_biped, + weight=0.25, + params={ + "command_name": "base_velocity", + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*ankle_link"), + "threshold": 0.4, + }, + ) + feet_slide = RewTerm( + func=mdp.feet_slide, + weight=-0.25, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*ankle_link"), + "asset_cfg": SceneEntityCfg("robot", body_names=".*ankle_link"), + }, + ) + dof_pos_limits = RewTerm( + func=mdp.joint_pos_limits, weight=-1.0, params={"asset_cfg": SceneEntityCfg("robot", joint_names=".*_ankle")} + ) + joint_deviation_hip = RewTerm( + func=mdp.joint_deviation_l1, + weight=-0.2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_hip_yaw", ".*_hip_roll"])}, + ) + joint_deviation_arms = RewTerm( + func=mdp.joint_deviation_l1, + weight=-0.2, + params={"asset_cfg": SceneEntityCfg("robot", joint_names=[".*_shoulder_.*", ".*_elbow"])}, + ) + joint_deviation_torso = RewTerm( + func=mdp.joint_deviation_l1, weight=-0.1, params={"asset_cfg": SceneEntityCfg("robot", joint_names="torso")} + ) + + +@configclass +class H1RoughEnvCfg(LocomotionVelocityRoughEnvCfg): + rewards: H1Rewards = H1Rewards() + + def __post_init__(self): + super().__post_init__() + self.scene.robot = H1_MINIMAL_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + self.events.push_robot = None + self.events.reset_robot_joints.params["position_range"] = (1.0, 1.0) + self.events.base_external_force_torque.params["asset_cfg"].body_names = [".*torso_link"] + self.events.reset_base.params = { + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (0.0, 0.0), + "y": (0.0, 0.0), + "z": (0.0, 0.0), + "roll": (0.0, 0.0), + "pitch": (0.0, 0.0), + "yaw": (0.0, 0.0), + }, + } + self.events.base_com = None + self.rewards.undesired_contacts = None + self.rewards.flat_orientation_l2.weight = -1.0 + self.rewards.dof_torques_l2.weight = 0.0 + self.rewards.action_rate_l2.weight = -0.005 + self.rewards.dof_acc_l2.weight = -1.25e-7 + self.commands.base_velocity.ranges.lin_vel_x = (0.0, 1.0) + self.commands.base_velocity.ranges.lin_vel_y = (0.0, 0.0) + self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0) + self.terminations.base_contact.params["sensor_cfg"].body_names = ".*torso_link" + + +@configclass +class H1RoughEnvCfg_PLAY(H1RoughEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + self.episode_length_s = 40.0 + # spawn the robot randomly in the grid (instead of their terrain levels) + self.scene.terrain.max_init_terrain_level = None + # reduce the number of terrains to save memory + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.num_rows = 5 + self.scene.terrain.terrain_generator.num_cols = 5 + self.scene.terrain.terrain_generator.curriculum = False + + self.commands.base_velocity.ranges.lin_vel_x = (1.0, 1.0) + self.commands.base_velocity.ranges.lin_vel_y = (0.0, 0.0) + self.commands.base_velocity.ranges.ang_vel_z = (-1.0, 1.0) + self.commands.base_velocity.ranges.heading = (0.0, 0.0) + # disable randomization for play + self.observations.policy.enable_corruption = False + # remove random pushing + self.events.base_external_force_torque = None + self.events.push_robot = None diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/__init__.py new file mode 100644 index 000000000000..cdc532db4255 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the locomotion environments.""" + +from isaaclab_experimental.envs.mdp import * # noqa: F401, F403 + +from .curriculums import * # noqa: F401, F403 +from .rewards import * # noqa: F401, F403 +from .terminations import * # noqa: F401, F403 diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/curriculums.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/curriculums.py new file mode 100644 index 000000000000..1ed0a4c6f33c --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/curriculums.py @@ -0,0 +1,40 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Curriculum functions for the velocity locomotion environment. + +Curriculum terms are not warp-managed (they run at reset time, not per-step), +so they remain torch-based. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import TYPE_CHECKING + +import torch +import warp as wp + +from isaaclab.assets import Articulation +from isaaclab.managers import SceneEntityCfg +from isaaclab.terrains import TerrainImporter + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +def terrain_levels_vel( + env: ManagerBasedRLEnv, env_ids: Sequence[int], asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> torch.Tensor: + """Curriculum based on the distance the robot walked when commanded to move at a desired velocity.""" + asset: Articulation = env.scene[asset_cfg.name] + terrain: TerrainImporter = env.scene.terrain + command = env.command_manager.get_command("base_velocity") + distance = torch.norm(wp.to_torch(asset.data.root_pos_w)[env_ids, :2] - env.scene.env_origins[env_ids, :2], dim=1) + move_up = distance > terrain.cfg.terrain_generator.size[0] / 2 + move_down = distance < torch.norm(command[env_ids, :2], dim=1) * env.max_episode_length_s * 0.5 + move_down *= ~move_up + terrain.update_env_origins(env_ids, move_up, move_down) + return torch.mean(terrain.terrain_levels.float()) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/rewards.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/rewards.py new file mode 100644 index 000000000000..5bf6de7a414b --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/rewards.py @@ -0,0 +1,312 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp-first reward functions for the velocity locomotion environment. + +All functions follow the ``func(env, out, **params) -> None`` signature. +Cross-manager torch tensors (contact sensor, commands) are cached as zero-copy +warp views on first call via ``wp.from_torch``. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import warp as wp + +from isaaclab.managers import SceneEntityCfg +from isaaclab.sensors import ContactSensor + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + +# --------------------------------------------------------------------------- +# feet_air_time +# --------------------------------------------------------------------------- + + +@wp.kernel +def _feet_air_time_kernel( + last_air_time: wp.array(dtype=wp.float32, ndim=2), + first_contact: wp.array(dtype=wp.float32, ndim=2), + body_ids: wp.array(dtype=wp.int32), + cmd_xy: wp.array(dtype=wp.float32, ndim=2), + threshold: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + s = float(0.0) + for k in range(body_ids.shape[0]): + b = body_ids[k] + s += (last_air_time[i, b] - threshold) * first_contact[i, b] + # gate by command magnitude + cx = cmd_xy[i, 0] + cy = cmd_xy[i, 1] + cmd_norm = wp.sqrt(cx * cx + cy * cy) + out[i] = wp.where(cmd_norm > 0.1, s, 0.0) + + +def feet_air_time(env: ManagerBasedRLEnv, out, command_name: str, sensor_cfg: SceneEntityCfg, threshold: float) -> None: + """Reward long steps taken by the feet using L2-kernel.""" + fn = feet_air_time + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + # Cache command bridge (persistent pointer) + if not getattr(fn, "_is_warmed_up", False) or fn._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + fn._cmd_wp = cmd if isinstance(cmd, wp.array) else wp.from_torch(cmd) + fn._cmd_name = command_name + fn._is_warmed_up = True + # Newton contact sensor returns persistent wp.arrays — use directly, no wp.from_torch needed + first_contact = contact_sensor.compute_first_contact(env.step_dt) + wp.launch( + kernel=_feet_air_time_kernel, + dim=env.num_envs, + inputs=[contact_sensor.data.last_air_time, first_contact, sensor_cfg.body_ids_wp, fn._cmd_wp, threshold, out], + device=env.device, + ) + + +# --------------------------------------------------------------------------- +# feet_air_time_positive_biped +# --------------------------------------------------------------------------- + + +@wp.kernel +def _feet_air_time_positive_biped_kernel( + air_time: wp.array(dtype=wp.float32, ndim=2), + contact_time: wp.array(dtype=wp.float32, ndim=2), + body_ids: wp.array(dtype=wp.int32), + cmd_xy: wp.array(dtype=wp.float32, ndim=2), + threshold: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + n_feet = body_ids.shape[0] + # count feet in contact and find single-stance min mode time + n_contact = int(0) + for k in range(n_feet): + b = body_ids[k] + if contact_time[i, b] > 0.0: + n_contact += 1 + single_stance = n_contact == 1 + min_val = threshold # clamp upper bound + for k in range(n_feet): + b = body_ids[k] + in_contact = contact_time[i, b] > 0.0 + mode_time = wp.where(in_contact, contact_time[i, b], air_time[i, b]) + val = wp.where(single_stance, mode_time, 0.0) + min_val = wp.min(min_val, val) + # gate by command magnitude + cx = cmd_xy[i, 0] + cy = cmd_xy[i, 1] + cmd_norm = wp.sqrt(cx * cx + cy * cy) + out[i] = wp.where(cmd_norm > 0.1, min_val, 0.0) + + +def feet_air_time_positive_biped(env, out, command_name: str, threshold: float, sensor_cfg: SceneEntityCfg) -> None: + """Reward long steps taken by the feet for bipeds.""" + fn = feet_air_time_positive_biped + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + if not getattr(fn, "_is_warmed_up", False) or fn._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + fn._cmd_wp = cmd if isinstance(cmd, wp.array) else wp.from_torch(cmd) + fn._cmd_name = command_name + fn._is_warmed_up = True + wp.launch( + kernel=_feet_air_time_positive_biped_kernel, + dim=env.num_envs, + inputs=[ + contact_sensor.data.current_air_time, + contact_sensor.data.current_contact_time, + sensor_cfg.body_ids_wp, + fn._cmd_wp, + threshold, + out, + ], + device=env.device, + ) + + +# --------------------------------------------------------------------------- +# feet_slide +# --------------------------------------------------------------------------- + + +@wp.kernel +def _feet_slide_kernel( + body_lin_vel_w: wp.array(dtype=wp.vec3f, ndim=2), + net_forces_w: wp.array(dtype=wp.vec3f, ndim=3), + body_ids: wp.array(dtype=wp.int32), + n_history: int, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + s = float(0.0) + for k in range(body_ids.shape[0]): + b = body_ids[k] + # check if in contact: max force norm over history > 1.0 + max_force = float(0.0) + for h in range(n_history): + f = net_forces_w[i, h, b] + f_norm = wp.sqrt(f[0] * f[0] + f[1] * f[1] + f[2] * f[2]) + max_force = wp.max(max_force, f_norm) + in_contact = wp.where(max_force > 1.0, 1.0, 0.0) + # planar velocity norm + vx = body_lin_vel_w[i, b][0] + vy = body_lin_vel_w[i, b][1] + vel_norm = wp.sqrt(vx * vx + vy * vy) + s += vel_norm * in_contact + out[i] = s + + +def feet_slide(env, out, sensor_cfg: SceneEntityCfg, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> None: + """Penalize feet sliding.""" + contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name] + asset = env.scene[asset_cfg.name] + wp.launch( + kernel=_feet_slide_kernel, + dim=env.num_envs, + inputs=[ + asset.data.body_lin_vel_w, + contact_sensor.data.net_forces_w_history, + sensor_cfg.body_ids_wp, + contact_sensor.data.net_forces_w_history.shape[1], + out, + ], + device=env.device, + ) + + +# --------------------------------------------------------------------------- +# track_lin_vel_xy_yaw_frame_exp +# --------------------------------------------------------------------------- + + +@wp.kernel +def _track_lin_vel_xy_yaw_frame_exp_kernel( + root_quat_w: wp.array(dtype=wp.quatf), + root_lin_vel_w: wp.array(dtype=wp.vec3f), + cmd: wp.array(dtype=wp.float32, ndim=2), + inv_std_sq: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + q = root_quat_w[i] + # extract yaw-only quaternion + qx = q[0] + qy = q[1] + qz = q[2] + qw = q[3] + sin_yaw = 2.0 * (qw * qz + qx * qy) + cos_yaw = 1.0 - 2.0 * (qy * qy + qz * qz) + yaw_half = wp.atan2(sin_yaw, cos_yaw) * 0.5 + yaw_q = wp.quatf(0.0, 0.0, wp.sin(yaw_half), wp.cos(yaw_half)) + # rotate world velocity into yaw frame (inverse = conjugate for unit quat) + vel_w = root_lin_vel_w[i] + vel_yaw = wp.quat_rotate(wp.quat_inverse(yaw_q), vel_w) + # error + ex = cmd[i, 0] - vel_yaw[0] + ey = cmd[i, 1] - vel_yaw[1] + err_sq = ex * ex + ey * ey + out[i] = wp.exp(-err_sq * inv_std_sq) + + +def track_lin_vel_xy_yaw_frame_exp( + env, out, std: float, command_name: str, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> None: + """Reward tracking of linear velocity commands (xy axes) in the gravity aligned robot frame.""" + fn = track_lin_vel_xy_yaw_frame_exp + asset = env.scene[asset_cfg.name] + if not getattr(fn, "_is_warmed_up", False) or fn._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + fn._cmd_wp = cmd if isinstance(cmd, wp.array) else wp.from_torch(cmd) + fn._cmd_name = command_name + fn._is_warmed_up = True + wp.launch( + kernel=_track_lin_vel_xy_yaw_frame_exp_kernel, + dim=env.num_envs, + inputs=[asset.data.root_quat_w, asset.data.root_lin_vel_w, fn._cmd_wp, 1.0 / (std * std), out], + device=env.device, + ) + + +# --------------------------------------------------------------------------- +# track_ang_vel_z_world_exp +# --------------------------------------------------------------------------- + + +@wp.kernel +def _track_ang_vel_z_world_exp_kernel( + root_ang_vel_w: wp.array(dtype=wp.vec3f), + cmd: wp.array(dtype=wp.float32, ndim=2), + inv_std_sq: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + err = cmd[i, 2] - root_ang_vel_w[i][2] + out[i] = wp.exp(-(err * err) * inv_std_sq) + + +def track_ang_vel_z_world_exp( + env, out, command_name: str, std: float, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> None: + """Reward tracking of angular velocity commands (yaw) in world frame.""" + fn = track_ang_vel_z_world_exp + asset = env.scene[asset_cfg.name] + if not getattr(fn, "_is_warmed_up", False) or fn._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + fn._cmd_wp = cmd if isinstance(cmd, wp.array) else wp.from_torch(cmd) + fn._cmd_name = command_name + fn._is_warmed_up = True + wp.launch( + kernel=_track_ang_vel_z_world_exp_kernel, + dim=env.num_envs, + inputs=[asset.data.root_ang_vel_w, fn._cmd_wp, 1.0 / (std * std), out], + device=env.device, + ) + + +# --------------------------------------------------------------------------- +# stand_still_joint_deviation_l1 +# --------------------------------------------------------------------------- + + +@wp.kernel +def _stand_still_joint_deviation_l1_kernel( + joint_pos: wp.array(dtype=wp.float32, ndim=2), + default_joint_pos: wp.array(dtype=wp.float32, ndim=2), + cmd: wp.array(dtype=wp.float32, ndim=2), + command_threshold: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + n_joints = joint_pos.shape[1] + dev = float(0.0) + for j in range(n_joints): + dev += wp.abs(joint_pos[i, j] - default_joint_pos[i, j]) + # gate: only penalize when command is small + cx = cmd[i, 0] + cy = cmd[i, 1] + cmd_norm = wp.sqrt(cx * cx + cy * cy) + out[i] = wp.where(cmd_norm < command_threshold, dev, 0.0) + + +def stand_still_joint_deviation_l1( + env, out, command_name: str, command_threshold: float = 0.06, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot") +) -> None: + """Penalize offsets from the default joint positions when the command is very small.""" + fn = stand_still_joint_deviation_l1 + asset = env.scene[asset_cfg.name] + if not getattr(fn, "_is_warmed_up", False) or fn._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + fn._cmd_wp = cmd if isinstance(cmd, wp.array) else wp.from_torch(cmd) + fn._cmd_name = command_name + fn._is_warmed_up = True + wp.launch( + kernel=_stand_still_joint_deviation_l1_kernel, + dim=env.num_envs, + inputs=[asset.data.joint_pos, asset.data.default_joint_pos, fn._cmd_wp, command_threshold, out], + device=env.device, + ) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/terminations.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/terminations.py new file mode 100644 index 000000000000..d7405098973f --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/mdp/terminations.py @@ -0,0 +1,66 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp-first termination functions for the velocity locomotion environment.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import warp as wp + +from isaaclab.assets import Articulation +from isaaclab.managers import SceneEntityCfg + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +@wp.kernel +def _terrain_out_of_bounds_kernel( + root_pos_w: wp.array(dtype=wp.vec3f), + half_width: float, + half_height: float, + distance_buffer: float, + out: wp.array(dtype=wp.bool), +): + i = wp.tid() + px = wp.abs(root_pos_w[i][0]) + py = wp.abs(root_pos_w[i][1]) + out[i] = px > half_width - distance_buffer or py > half_height - distance_buffer + + +def terrain_out_of_bounds( + env: ManagerBasedRLEnv, out, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot"), distance_buffer: float = 3.0 +) -> None: + """Terminate when the actor moves too close to the edge of the terrain.""" + fn = terrain_out_of_bounds + if not getattr(fn, "_is_warmed_up", False): + terrain_type = env.scene.cfg.terrain.terrain_type + if terrain_type == "plane": + fn._is_plane = True + elif terrain_type == "generator": + fn._is_plane = False + terrain_gen_cfg = env.scene.terrain.cfg.terrain_generator + grid_width, grid_length = terrain_gen_cfg.size + n_rows, n_cols = terrain_gen_cfg.num_rows, terrain_gen_cfg.num_cols + border_width = terrain_gen_cfg.border_width + fn._half_width = 0.5 * (n_rows * grid_width + 2 * border_width) + fn._half_height = 0.5 * (n_cols * grid_length + 2 * border_width) + else: + raise ValueError("Received unsupported terrain type, must be either 'plane' or 'generator'.") + fn._is_warmed_up = True + + if fn._is_plane: + out.zero_() + return + + asset: Articulation = env.scene[asset_cfg.name] + wp.launch( + kernel=_terrain_out_of_bounds_kernel, + dim=env.num_envs, + inputs=[asset.data.root_pos_w, fn._half_width, fn._half_height, distance_buffer, out], + device=env.device, + ) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/velocity_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/velocity_env_cfg.py new file mode 100644 index 000000000000..7442d3654ba3 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/locomotion/velocity/velocity_env_cfg.py @@ -0,0 +1,296 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math +from dataclasses import MISSING + +from isaaclab_experimental.managers import ObservationTermCfg as ObsTerm +from isaaclab_experimental.managers import RewardTermCfg as RewTerm +from isaaclab_experimental.managers import SceneEntityCfg +from isaaclab_experimental.managers import TerminationTermCfg as DoneTerm + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sensors import ContactSensorCfg +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR, ISAACLAB_NUCLEUS_DIR +from isaaclab.utils.noise import UniformNoiseCfg as Unoise + +import isaaclab_tasks_experimental.manager_based.locomotion.velocity.mdp as mdp + +## +# Pre-defined configs +## +from isaaclab.terrains.config.rough import ROUGH_TERRAINS_CFG # isort: skip + + +## +# Scene definition +## + + +@configclass +class MySceneCfg(InteractiveSceneCfg): + """Configuration for the terrain scene with a legged robot.""" + + # ground terrain + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="generator", + terrain_generator=ROUGH_TERRAINS_CFG, + max_init_terrain_level=5, + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="multiply", + restitution_combine_mode="multiply", + static_friction=1.0, + dynamic_friction=1.0, + ), + visual_material=sim_utils.MdlFileCfg( + mdl_path=f"{ISAACLAB_NUCLEUS_DIR}/Materials/TilesMarbleSpiderWhiteBrickBondHoned/TilesMarbleSpiderWhiteBrickBondHoned.mdl", + project_uvw=True, + texture_scale=(0.25, 0.25), + ), + debug_vis=False, + ) + # robots + robot: ArticulationCfg = MISSING + # sensors + contact_forces = ContactSensorCfg( + prim_path="{ENV_REGEX_NS}/Robot/.*", + filter_prim_paths_expr=[], + history_length=3, + track_air_time=True, + ) + # lights + sky_light = AssetBaseCfg( + prim_path="/World/skyLight", + spawn=sim_utils.DomeLightCfg( + intensity=750.0, + texture_file=f"{ISAAC_NUCLEUS_DIR}/Materials/Textures/Skies/PolyHaven/kloofendal_43d_clear_puresky_4k.hdr", + ), + ) + + +## +# MDP settings +## + + +@configclass +class CommandsCfg: + """Command specifications for the MDP.""" + + base_velocity = mdp.UniformVelocityCommandCfg( + asset_name="robot", + resampling_time_range=(10.0, 10.0), + rel_standing_envs=0.02, + rel_heading_envs=1.0, + heading_command=True, + heading_control_stiffness=0.5, + debug_vis=True, + ranges=mdp.UniformVelocityCommandCfg.Ranges( + lin_vel_x=(-1.0, 1.0), lin_vel_y=(-1.0, 1.0), ang_vel_z=(-1.0, 1.0), heading=(-math.pi, math.pi) + ), + ) + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + joint_pos = mdp.JointPositionActionCfg(asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True) + + +@configclass +class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + base_lin_vel = ObsTerm(func=mdp.base_lin_vel, noise=Unoise(n_min=-0.1, n_max=0.1)) + base_ang_vel = ObsTerm(func=mdp.base_ang_vel, noise=Unoise(n_min=-0.2, n_max=0.2)) + projected_gravity = ObsTerm( + func=mdp.projected_gravity, + noise=Unoise(n_min=-0.05, n_max=0.05), + ) + velocity_commands = ObsTerm(func=mdp.generated_commands, params={"command_name": "base_velocity"}) + joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01)) + joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-1.5, n_max=1.5)) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + # FIXME(warp-migration): COM randomization in exp manager-based locomotion currently causes + # NaNs and is temporarily disabled. + # base_com = EventTerm( + # func=mdp.randomize_rigid_body_com, + # mode="startup", + # params={ + # "asset_cfg": SceneEntityCfg("robot", body_names="base"), + # "com_range": {"x": (-0.05, 0.05), "y": (-0.05, 0.05), "z": (-0.01, 0.01)}, + # }, + # ) + base_com = None + + # reset + base_external_force_torque = EventTerm( + func=mdp.apply_external_force_torque, + mode="reset", + params={ + "asset_cfg": SceneEntityCfg("robot", body_names="base"), + "force_range": (0.0, 0.0), + "torque_range": (-0.0, 0.0), + }, + ) + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={ + "pose_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5), "yaw": (-3.14, 3.14)}, + "velocity_range": { + "x": (-0.5, 0.5), + "y": (-0.5, 0.5), + "z": (-0.5, 0.5), + "roll": (-0.5, 0.5), + "pitch": (-0.5, 0.5), + "yaw": (-0.5, 0.5), + }, + }, + ) + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_scale, + mode="reset", + params={ + "position_range": (0.5, 1.5), + "velocity_range": (0.0, 0.0), + }, + ) + + # interval + push_robot = EventTerm( + func=mdp.push_by_setting_velocity, + mode="interval", + interval_range_s=(10.0, 15.0), + params={"velocity_range": {"x": (-0.5, 0.5), "y": (-0.5, 0.5)}}, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + # -- task + track_lin_vel_xy_exp = RewTerm( + func=mdp.track_lin_vel_xy_exp, weight=1.0, params={"command_name": "base_velocity", "std": math.sqrt(0.25)} + ) + track_ang_vel_z_exp = RewTerm( + func=mdp.track_ang_vel_z_exp, weight=0.5, params={"command_name": "base_velocity", "std": math.sqrt(0.25)} + ) + # -- penalties + lin_vel_z_l2 = RewTerm(func=mdp.lin_vel_z_l2, weight=-2.0) + ang_vel_xy_l2 = RewTerm(func=mdp.ang_vel_xy_l2, weight=-0.05) + dof_torques_l2 = RewTerm(func=mdp.joint_torques_l2, weight=-1.0e-5) + dof_acc_l2 = RewTerm(func=mdp.joint_acc_l2, weight=-2.5e-7) + action_rate_l2 = RewTerm(func=mdp.action_rate_l2, weight=-0.01) + feet_air_time = RewTerm( + func=mdp.feet_air_time, + weight=0.125, + params={ + "sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*FOOT"), + "command_name": "base_velocity", + "threshold": 0.5, + }, + ) + undesired_contacts = RewTerm( + func=mdp.undesired_contacts, + weight=-1.0, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names=".*THIGH"), "threshold": 1.0}, + ) + # -- optional penalties + flat_orientation_l2 = RewTerm(func=mdp.flat_orientation_l2, weight=0.0) + dof_pos_limits = RewTerm(func=mdp.joint_pos_limits, weight=0.0) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + base_contact = DoneTerm( + func=mdp.illegal_contact, + params={"sensor_cfg": SceneEntityCfg("contact_forces", body_names="base"), "threshold": 1.0}, + ) + + +@configclass +class CurriculumCfg: + """Curriculum terms for the MDP.""" + + terrain_levels = CurrTerm(func=mdp.terrain_levels_vel) + + +## +# Environment configuration +## + + +@configclass +class LocomotionVelocityRoughEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the locomotion velocity-tracking environment.""" + + # Scene settings + scene: MySceneCfg = MySceneCfg(num_envs=4096, env_spacing=2.5, replicate_physics=True) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + commands: CommandsCfg = CommandsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + curriculum: CurriculumCfg = CurriculumCfg() + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 4 + self.episode_length_s = 20.0 + # simulation settings + self.sim.dt = 1.0 / 200.0 + self.sim.render_interval = self.decimation + self.sim.physics_material = self.scene.terrain.physics_material + # update sensor update periods + if self.scene.contact_forces is not None: + self.scene.contact_forces.update_period = self.sim.dt + # check if terrain levels curriculum is enabled + if getattr(self.curriculum, "terrain_levels", None) is not None: + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.curriculum = True + else: + if self.scene.terrain.terrain_generator is not None: + self.scene.terrain.terrain_generator.curriculum = False diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/__init__.py new file mode 100644 index 000000000000..6cd56351b6e5 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from .reach import * diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/__init__.py new file mode 100644 index 000000000000..fe34199f2321 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Reach experimental task registrations (manager-based).""" diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/__init__.py new file mode 100644 index 000000000000..460a30569089 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/franka/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/franka/__init__.py new file mode 100644 index 000000000000..b08612ccc741 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/franka/__init__.py @@ -0,0 +1,41 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym + +# Reuse agent configs from the stable task package. +from isaaclab_tasks.manager_based.manipulation.reach.config.franka import agents + +## +# Register Gym environments. +## + +## +# Joint Position Control +## + +gym.register( + id="Isaac-Reach-Franka-Warp-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:FrankaReachEnvCfg", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + }, +) + +gym.register( + id="Isaac-Reach-Franka-Warp-Play-v0", + entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:FrankaReachEnvCfg_PLAY", + "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:FrankaReachPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py new file mode 100644 index 000000000000..5b3c7ec0c2f4 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/franka/joint_pos_env_cfg.py @@ -0,0 +1,74 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +import isaaclab_tasks_experimental.manager_based.manipulation.reach.mdp as mdp +from isaaclab_tasks_experimental.manager_based.manipulation.reach.reach_env_cfg import ReachEnvCfg + +## +# Pre-defined configs +## +from isaaclab_assets import FRANKA_PANDA_CFG # isort: skip + + +## +# Environment configuration +## + + +@configclass +class FrankaReachEnvCfg(ReachEnvCfg): + sim: SimulationCfg = SimulationCfg( + dt=1 / 60, + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=50, + nconmax=20, + cone="pyramidal", + impratio=1, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ), + ) + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # switch robot to franka + self.scene.robot = FRANKA_PANDA_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + # override rewards + self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = ["panda_hand"] + self.rewards.end_effector_position_tracking_fine_grained.params["asset_cfg"].body_names = ["panda_hand"] + self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = ["panda_hand"] + + # override actions + self.actions.arm_action = mdp.JointPositionActionCfg( + asset_name="robot", joint_names=["panda_joint.*"], scale=0.5, use_default_offset=True + ) + # override command generator body + # end-effector is along z-direction + self.commands.ee_pose.body_name = "panda_hand" + self.commands.ee_pose.ranges.pitch = (math.pi, math.pi) + + +@configclass +class FrankaReachEnvCfg_PLAY(FrankaReachEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/ur_10/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/ur_10/__init__.py new file mode 100644 index 000000000000..85908c158052 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/ur_10/__init__.py @@ -0,0 +1,36 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +# UR10 env disabled: USD asset has composition errors (broken asset file). +# Fails on both torch baseline and warp with: +# RuntimeError: USD stage has composition errors while loading provided stage +# Re-enable once the UR10 USD asset is fixed. + +# import gymnasium as gym +# from isaaclab_tasks.manager_based.manipulation.reach.config.ur_10 import agents + +# gym.register( +# id="Isaac-Reach-UR10-Warp-v0", +# entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", +# disable_env_checker=True, +# kwargs={ +# "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10ReachEnvCfg", +# "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", +# "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg", +# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", +# }, +# ) + +# gym.register( +# id="Isaac-Reach-UR10-Warp-Play-v0", +# entry_point="isaaclab_experimental.envs:ManagerBasedRLEnvWarp", +# disable_env_checker=True, +# kwargs={ +# "env_cfg_entry_point": f"{__name__}.joint_pos_env_cfg:UR10ReachEnvCfg_PLAY", +# "rl_games_cfg_entry_point": f"{agents.__name__}:rl_games_ppo_cfg.yaml", +# "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:UR10ReachPPORunnerCfg", +# "skrl_cfg_entry_point": f"{agents.__name__}:skrl_ppo_cfg.yaml", +# }, +# ) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/ur_10/joint_pos_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/ur_10/joint_pos_env_cfg.py new file mode 100644 index 000000000000..7eddda91f5ac --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/config/ur_10/joint_pos_env_cfg.py @@ -0,0 +1,74 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import math + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +from isaaclab.sim import SimulationCfg +from isaaclab.utils import configclass + +import isaaclab_tasks_experimental.manager_based.manipulation.reach.mdp as mdp +from isaaclab_tasks_experimental.manager_based.manipulation.reach.reach_env_cfg import ReachEnvCfg + +## +# Pre-defined configs +## +from isaaclab_assets import UR10_CFG # isort: skip + + +## +# Environment configuration +## + + +@configclass +class UR10ReachEnvCfg(ReachEnvCfg): + sim: SimulationCfg = SimulationCfg( + physics=NewtonCfg( + solver_cfg=MJWarpSolverCfg( + njmax=50, + nconmax=20, + cone="pyramidal", + impratio=1, + integrator="implicitfast", + ), + num_substeps=1, + debug_mode=False, + ) + ) + + def __post_init__(self): + # post init of parent + super().__post_init__() + + # switch robot to ur10 + self.scene.robot = UR10_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") + # override events + self.events.reset_robot_joints.params["position_range"] = (0.75, 1.25) + # override rewards + self.rewards.end_effector_position_tracking.params["asset_cfg"].body_names = ["ee_link"] + self.rewards.end_effector_position_tracking_fine_grained.params["asset_cfg"].body_names = ["ee_link"] + self.rewards.end_effector_orientation_tracking.params["asset_cfg"].body_names = ["ee_link"] + # override actions + self.actions.arm_action = mdp.JointPositionActionCfg( + asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True + ) + # override command generator body + # end-effector is along x-direction + self.commands.ee_pose.body_name = "ee_link" + self.commands.ee_pose.ranges.pitch = (math.pi / 2, math.pi / 2) + + +@configclass +class UR10ReachEnvCfg_PLAY(UR10ReachEnvCfg): + def __post_init__(self): + # post init of parent + super().__post_init__() + # make a smaller scene for play + self.scene.num_envs = 50 + self.scene.env_spacing = 2.5 + # disable randomization for play + self.observations.policy.enable_corruption = False diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/mdp/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/mdp/__init__.py new file mode 100644 index 000000000000..b0845d6735b6 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/mdp/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""This sub-module contains the functions that are specific to the reach environments.""" + +from isaaclab_experimental.envs.mdp import * # noqa: F401, F403 + +from .rewards import * # noqa: F401, F403 diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/mdp/rewards.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/mdp/rewards.py new file mode 100644 index 000000000000..811163ec9737 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/mdp/rewards.py @@ -0,0 +1,166 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp-first reward terms for the reach task. + +All functions follow the ``func(env, out, **params) -> None`` signature. +Command tensors are cached as zero-copy warp views on first call. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import warp as wp + +from isaaclab.assets import Articulation +from isaaclab.managers import SceneEntityCfg + +if TYPE_CHECKING: + from isaaclab.envs import ManagerBasedRLEnv + + +# --------------------------------------------------------------------------- +# position_command_error +# --------------------------------------------------------------------------- + + +@wp.kernel +def _position_command_error_kernel( + root_pos_w: wp.array(dtype=wp.vec3f), + root_quat_w: wp.array(dtype=wp.quatf), + body_pos_w: wp.array(dtype=wp.vec3f, ndim=2), + cmd: wp.array(dtype=wp.float32, ndim=2), + body_idx: int, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + # desired position in body frame -> world frame + des_b = wp.vec3f(cmd[i, 0], cmd[i, 1], cmd[i, 2]) + des_w = root_pos_w[i] + wp.quat_rotate(root_quat_w[i], des_b) + # current end-effector position + cur_w = body_pos_w[i, body_idx] + dx = cur_w[0] - des_w[0] + dy = cur_w[1] - des_w[1] + dz = cur_w[2] - des_w[2] + out[i] = wp.sqrt(dx * dx + dy * dy + dz * dz) + + +def position_command_error(env: ManagerBasedRLEnv, out, command_name: str, asset_cfg: SceneEntityCfg) -> None: + """Penalize tracking of the position error using L2-norm.""" + fn = position_command_error + asset: Articulation = env.scene[asset_cfg.name] + if not hasattr(fn, "_cmd_wp") or fn._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + fn._cmd_wp = cmd if isinstance(cmd, wp.array) else wp.from_torch(cmd) + fn._cmd_name = command_name + wp.launch( + kernel=_position_command_error_kernel, + dim=env.num_envs, + inputs=[ + asset.data.root_pos_w, + asset.data.root_quat_w, + asset.data.body_pos_w, + fn._cmd_wp, + asset_cfg.body_ids[0], + out, + ], + device=env.device, + ) + + +# --------------------------------------------------------------------------- +# position_command_error_tanh +# --------------------------------------------------------------------------- + + +@wp.kernel +def _position_command_error_tanh_kernel( + root_pos_w: wp.array(dtype=wp.vec3f), + root_quat_w: wp.array(dtype=wp.quatf), + body_pos_w: wp.array(dtype=wp.vec3f, ndim=2), + cmd: wp.array(dtype=wp.float32, ndim=2), + body_idx: int, + inv_std: float, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + des_b = wp.vec3f(cmd[i, 0], cmd[i, 1], cmd[i, 2]) + des_w = root_pos_w[i] + wp.quat_rotate(root_quat_w[i], des_b) + cur_w = body_pos_w[i, body_idx] + dx = cur_w[0] - des_w[0] + dy = cur_w[1] - des_w[1] + dz = cur_w[2] - des_w[2] + dist = wp.sqrt(dx * dx + dy * dy + dz * dz) + out[i] = 1.0 - wp.tanh(dist * inv_std) + + +def position_command_error_tanh( + env: ManagerBasedRLEnv, out, std: float, command_name: str, asset_cfg: SceneEntityCfg +) -> None: + """Reward tracking of the position using the tanh kernel.""" + fn = position_command_error_tanh + asset: Articulation = env.scene[asset_cfg.name] + if not hasattr(fn, "_cmd_wp") or fn._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + fn._cmd_wp = cmd if isinstance(cmd, wp.array) else wp.from_torch(cmd) + fn._cmd_name = command_name + wp.launch( + kernel=_position_command_error_tanh_kernel, + dim=env.num_envs, + inputs=[ + asset.data.root_pos_w, + asset.data.root_quat_w, + asset.data.body_pos_w, + fn._cmd_wp, + asset_cfg.body_ids[0], + 1.0 / std, + out, + ], + device=env.device, + ) + + +# --------------------------------------------------------------------------- +# orientation_command_error +# --------------------------------------------------------------------------- + + +@wp.kernel +def _orientation_command_error_kernel( + root_quat_w: wp.array(dtype=wp.quatf), + body_quat_w: wp.array(dtype=wp.quatf, ndim=2), + cmd: wp.array(dtype=wp.float32, ndim=2), + body_idx: int, + out: wp.array(dtype=wp.float32), +): + i = wp.tid() + # desired quat in body frame -> world frame: q_des_w = q_root * q_des_b + des_b = wp.quatf(cmd[i, 3], cmd[i, 4], cmd[i, 5], cmd[i, 6]) + des_w = root_quat_w[i] * des_b + # current ee orientation + cur_w = body_quat_w[i, body_idx] + # shortest-path error: angle of q_err = cur^-1 * des + q_err = wp.quat_inverse(cur_w) * des_w + # error magnitude = 2 * acos(|w|) (w component of the error quaternion) + qw = wp.abs(q_err[3]) + qw = wp.clamp(qw, 0.0, 1.0) + out[i] = 2.0 * wp.acos(qw) + + +def orientation_command_error(env: ManagerBasedRLEnv, out, command_name: str, asset_cfg: SceneEntityCfg) -> None: + """Penalize tracking orientation error using shortest path.""" + fn = orientation_command_error + asset: Articulation = env.scene[asset_cfg.name] + if not hasattr(fn, "_cmd_wp") or fn._cmd_name != command_name: + cmd = env.command_manager.get_command(command_name) + fn._cmd_wp = cmd if isinstance(cmd, wp.array) else wp.from_torch(cmd) + fn._cmd_name = command_name + wp.launch( + kernel=_orientation_command_error_kernel, + dim=env.num_envs, + inputs=[asset.data.root_quat_w, asset.data.body_quat_w, fn._cmd_wp, asset_cfg.body_ids[0], out], + device=env.device, + ) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/reach_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/reach_env_cfg.py new file mode 100644 index 000000000000..d019b0531402 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/manager_based/manipulation/reach/reach_env_cfg.py @@ -0,0 +1,206 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from dataclasses import MISSING + +from isaaclab_experimental.managers import ObservationTermCfg as ObsTerm +from isaaclab_experimental.managers import RewardTermCfg as RewTerm +from isaaclab_experimental.managers import SceneEntityCfg +from isaaclab_experimental.managers import TerminationTermCfg as DoneTerm + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.envs import ManagerBasedRLEnvCfg +from isaaclab.managers import ActionTermCfg as ActionTerm +from isaaclab.managers import CurriculumTermCfg as CurrTerm +from isaaclab.managers import EventTermCfg as EventTerm +from isaaclab.managers import ObservationGroupCfg as ObsGroup +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.utils import configclass +from isaaclab.utils.noise import UniformNoiseCfg as Unoise + +import isaaclab_tasks_experimental.manager_based.manipulation.reach.mdp as mdp + +## +# Scene definition +## + + +@configclass +class ReachSceneCfg(InteractiveSceneCfg): + """Configuration for the scene with a robotic arm.""" + + # world + ground = AssetBaseCfg( + prim_path="/World/ground", + spawn=sim_utils.GroundPlaneCfg(), + init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)), + ) + + # robots + robot: ArticulationCfg = MISSING + + # lights + light = AssetBaseCfg( + prim_path="/World/light", + spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0), + ) + + +## +# MDP settings +## + + +@configclass +class CommandsCfg: + """Command terms for the MDP.""" + + ee_pose = mdp.UniformPoseCommandCfg( + asset_name="robot", + body_name=MISSING, + resampling_time_range=(4.0, 4.0), + debug_vis=True, + ranges=mdp.UniformPoseCommandCfg.Ranges( + pos_x=(0.35, 0.65), + pos_y=(-0.2, 0.2), + pos_z=(0.15, 0.5), + roll=(0.0, 0.0), + pitch=MISSING, # depends on end-effector axis + yaw=(-3.14, 3.14), + ), + ) + + +@configclass +class ActionsCfg: + """Action specifications for the MDP.""" + + arm_action: ActionTerm = MISSING + gripper_action: ActionTerm | None = None + + +@configclass +class ObservationsCfg: + """Observation specifications for the MDP.""" + + @configclass + class PolicyCfg(ObsGroup): + """Observations for policy group.""" + + # observation terms (order preserved) + joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01)) + joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-0.01, n_max=0.01)) + pose_command = ObsTerm(func=mdp.generated_commands, params={"command_name": "ee_pose"}) + actions = ObsTerm(func=mdp.last_action) + + def __post_init__(self): + self.enable_corruption = True + self.concatenate_terms = True + + # observation groups + policy: PolicyCfg = PolicyCfg() + + +@configclass +class EventCfg: + """Configuration for events.""" + + reset_base = EventTerm( + func=mdp.reset_root_state_uniform, + mode="reset", + params={"pose_range": {}, "velocity_range": {}}, + ) + + reset_robot_joints = EventTerm( + func=mdp.reset_joints_by_scale, + mode="reset", + params={ + "position_range": (0.5, 1.5), + "velocity_range": (0.0, 0.0), + }, + ) + + +@configclass +class RewardsCfg: + """Reward terms for the MDP.""" + + # task terms + end_effector_position_tracking = RewTerm( + func=mdp.position_command_error, + weight=-0.2, + params={"asset_cfg": SceneEntityCfg("robot", body_names=MISSING), "command_name": "ee_pose"}, + ) + end_effector_position_tracking_fine_grained = RewTerm( + func=mdp.position_command_error_tanh, + weight=0.1, + params={"asset_cfg": SceneEntityCfg("robot", body_names=MISSING), "std": 0.1, "command_name": "ee_pose"}, + ) + end_effector_orientation_tracking = RewTerm( + func=mdp.orientation_command_error, + weight=-0.1, + params={"asset_cfg": SceneEntityCfg("robot", body_names=MISSING), "command_name": "ee_pose"}, + ) + + # action penalty + action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.0001) + joint_vel = RewTerm( + func=mdp.joint_vel_l2, + weight=-0.0001, + params={"asset_cfg": SceneEntityCfg("robot")}, + ) + + +@configclass +class TerminationsCfg: + """Termination terms for the MDP.""" + + time_out = DoneTerm(func=mdp.time_out, time_out=True) + + +@configclass +class CurriculumCfg: + """Curriculum terms for the MDP.""" + + action_rate = CurrTerm( + func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.005, "num_steps": 4500} + ) + + joint_vel = CurrTerm( + func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.001, "num_steps": 4500} + ) + + +## +# Environment configuration +## + + +@configclass +class ReachEnvCfg(ManagerBasedRLEnvCfg): + """Configuration for the reach end-effector pose tracking environment.""" + + # Scene settings + scene: ReachSceneCfg = ReachSceneCfg(num_envs=4096, env_spacing=2.5) + # Basic settings + observations: ObservationsCfg = ObservationsCfg() + actions: ActionsCfg = ActionsCfg() + commands: CommandsCfg = CommandsCfg() + # MDP settings + rewards: RewardsCfg = RewardsCfg() + terminations: TerminationsCfg = TerminationsCfg() + events: EventCfg = EventCfg() + curriculum: CurriculumCfg = CurriculumCfg() + + def __post_init__(self): + """Post initialization.""" + # general settings + self.decimation = 2 + self.sim.render_interval = self.decimation + self.episode_length_s = 12.0 + self.viewer.eye = (3.5, 3.5, 3.5) + # simulation settings + self.sim.dt = 1.0 / 60.0