From c5259f7a4e98cb9266b3a884261eacfe9cc5ee69 Mon Sep 17 00:00:00 2001 From: Jichuan Hu Date: Fri, 27 Feb 2026 01:50:52 -0800 Subject: [PATCH 1/2] Add experimental warp envs and infrastructure from dev/newton Add isaaclab_experimental package with DirectRLEnvWarp base class, InteractiveSceneWarp, and WarpGraphCache utility. Add direct warp environments in isaaclab_tasks_experimental: - Cartpole, Ant, Humanoid, Locomotion (base), InHand Manipulation, Allegro Hand, with agent configs for rsl_rl, rl_games, skrl, sb3. Adapt to develop base class API: - find_joints 2-value return (indices, names) - episode_length_buf as property with in-place copy for warp sync - _ALL_ENV_MASK on base env instead of articulation - set_joint_effort_target_mask for CUDA graph compatibility - _get_observations returns dict for rsl_rl wrapper Align solver configs with stable develop PresetCfg values. Add safe_normalize to guard against NaN from wp.normalize. Fix reset_root to_targets computation to use actual targets. Fix cartpole reset kernel to use parameterized joint indices. Clean up extension.toml and setup.py dependencies. Switch timer import to isaaclab.utils.timer. --- CONTRIBUTORS.md | 1 + .../reinforcement_learning/rl_games/play.py | 3 + .../reinforcement_learning/rl_games/train.py | 3 + scripts/reinforcement_learning/rsl_rl/play.py | 3 + .../reinforcement_learning/rsl_rl/train.py | 3 + scripts/reinforcement_learning/sb3/play.py | 3 + scripts/reinforcement_learning/sb3/train.py | 2 + scripts/reinforcement_learning/skrl/play.py | 3 + scripts/reinforcement_learning/skrl/train.py | 3 + .../config/extension.toml | 21 + .../isaaclab_experimental/__init__.py | 20 + .../isaaclab_experimental/envs/__init__.py | 51 + .../envs/direct_rl_env_warp.py | 832 +++++++++++++++ .../envs/interactive_scene_warp.py | 44 + .../envs/utils/__init__.py | 6 + .../utils/warp_graph_cache.py | 73 ++ source/isaaclab_experimental/pyproject.toml | 3 + source/isaaclab_experimental/setup.py | 44 + .../isaaclab_rl/rsl_rl/vecenv_wrapper.py | 11 +- .../config/extension.toml | 22 + .../docs/README.md | 3 + .../isaaclab_tasks_experimental/__init__.py | 32 + .../direct/__init__.py | 10 + .../direct/allegro_hand/__init__.py | 29 + .../allegro_hand/allegro_hand_warp_env_cfg.py | 136 +++ .../direct/ant/__init__.py | 28 + .../direct/ant/ant_env_warp.py | 91 ++ .../direct/cartpole/__init__.py | 29 + .../direct/cartpole/cartpole_warp_env.py | 357 +++++++ .../direct/humanoid/__init__.py | 28 + .../direct/humanoid/humanoid_warp_env.py | 114 ++ .../direct/inhand_manipulation/__init__.py | 4 + .../inhand_manipulation_warp_env.py | 980 ++++++++++++++++++ .../direct/locomotion/__init__.py | 4 + .../direct/locomotion/locomotion_env_warp.py | 575 ++++++++++ .../pyproject.toml | 3 + source/isaaclab_tasks_experimental/setup.py | 40 + 37 files changed, 3613 insertions(+), 1 deletion(-) create mode 100644 source/isaaclab_experimental/config/extension.toml create mode 100644 source/isaaclab_experimental/isaaclab_experimental/__init__.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py create mode 100644 source/isaaclab_experimental/pyproject.toml create mode 100644 source/isaaclab_experimental/setup.py create mode 100644 source/isaaclab_tasks_experimental/config/extension.toml create mode 100644 source/isaaclab_tasks_experimental/docs/README.md create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/ant_env_warp.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/cartpole_warp_env.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/humanoid_warp_env.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/locomotion_env_warp.py create mode 100644 source/isaaclab_tasks_experimental/pyproject.toml create mode 100644 source/isaaclab_tasks_experimental/setup.py diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 201c70bf727e..c67997cf40be 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -93,6 +93,7 @@ Guidelines for modifications: * Ji Yuan Feng * Jia Lin Yuan * Jiakai Zhang +* Jichuan Hu * Jinghuan Shang * Jingzhou Liu * Jinqi Wei diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py index eb2390af90d6..dd61d80da530 100644 --- a/scripts/reinforcement_learning/rl_games/play.py +++ b/scripts/reinforcement_learning/rl_games/play.py @@ -6,6 +6,7 @@ """Script to play a checkpoint if an RL agent from RL-Games.""" import argparse +import contextlib import math import os import random @@ -29,6 +30,8 @@ from isaaclab_tasks.utils import add_launcher_args, get_checkpoint_path, launch_simulation, resolve_task_config # PLACEHOLDER: Extension template (do not remove this comment) +with contextlib.suppress(ImportError): + import isaaclab_tasks_experimental # noqa: F401 # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from RL-Games.") diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py index 5ad13b401bb3..4de23c19246a 100644 --- a/scripts/reinforcement_learning/rl_games/train.py +++ b/scripts/reinforcement_learning/rl_games/train.py @@ -6,6 +6,7 @@ """Script to train RL agent with RL-Games.""" import argparse +import contextlib import logging import math import os @@ -33,6 +34,8 @@ logger = logging.getLogger(__name__) # PLACEHOLDER: Extension template (do not remove this comment) +with contextlib.suppress(ImportError): + import isaaclab_tasks_experimental # noqa: F401 # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.") diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py index f790f627a22a..cb60206bdf99 100644 --- a/scripts/reinforcement_learning/rsl_rl/play.py +++ b/scripts/reinforcement_learning/rsl_rl/play.py @@ -6,6 +6,7 @@ """Script to play a checkpoint if an RL agent from RSL-RL.""" import argparse +import contextlib import importlib.metadata as metadata import os import sys @@ -37,6 +38,8 @@ import cli_args # isort: skip # PLACEHOLDER: Extension template (do not remove this comment) +with contextlib.suppress(ImportError): + import isaaclab_tasks_experimental # noqa: F401 # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.") diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index 7ca2d3156da5..9c4c95da2309 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -6,6 +6,7 @@ """Script to train RL agent with RSL-RL.""" import argparse +import contextlib import importlib.metadata as metadata import logging import os @@ -35,6 +36,8 @@ logger = logging.getLogger(__name__) # PLACEHOLDER: Extension template (do not remove this comment) +with contextlib.suppress(ImportError): + import isaaclab_tasks_experimental # noqa: F401 RSL_RL_VERSION = "3.0.1" diff --git a/scripts/reinforcement_learning/sb3/play.py b/scripts/reinforcement_learning/sb3/play.py index a6f222d346c1..a1f8757a1e8c 100644 --- a/scripts/reinforcement_learning/sb3/play.py +++ b/scripts/reinforcement_learning/sb3/play.py @@ -6,6 +6,7 @@ """Script to play a checkpoint if an RL agent from Stable-Baselines3.""" import argparse +import contextlib import os import random import sys @@ -27,6 +28,8 @@ from isaaclab_tasks.utils import add_launcher_args, get_checkpoint_path, launch_simulation, resolve_task_config # PLACEHOLDER: Extension template (do not remove this comment) +with contextlib.suppress(ImportError): + import isaaclab_tasks_experimental # noqa: F401 # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from Stable-Baselines3.") diff --git a/scripts/reinforcement_learning/sb3/train.py b/scripts/reinforcement_learning/sb3/train.py index bd79599d1fda..7bf757ef5483 100644 --- a/scripts/reinforcement_learning/sb3/train.py +++ b/scripts/reinforcement_learning/sb3/train.py @@ -35,6 +35,8 @@ logger = logging.getLogger(__name__) # PLACEHOLDER: Extension template (do not remove this comment) +with contextlib.suppress(ImportError): + import isaaclab_tasks_experimental # noqa: F401 # -- argparse ---------------------------------------------------------------- parser = argparse.ArgumentParser(description="Train an RL agent with Stable-Baselines3.") diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py index 0349d405967e..3857c095150b 100644 --- a/scripts/reinforcement_learning/skrl/play.py +++ b/scripts/reinforcement_learning/skrl/play.py @@ -11,6 +11,7 @@ """ import argparse +import contextlib import os import random import sys @@ -30,6 +31,8 @@ from isaaclab_tasks.utils import add_launcher_args, get_checkpoint_path, launch_simulation, resolve_task_config # PLACEHOLDER: Extension template (do not remove this comment) +with contextlib.suppress(ImportError): + import isaaclab_tasks_experimental # noqa: F401 SKRL_VERSION = "1.4.3" diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py index 750ebeb8798d..edb124381647 100644 --- a/scripts/reinforcement_learning/skrl/train.py +++ b/scripts/reinforcement_learning/skrl/train.py @@ -11,6 +11,7 @@ """ import argparse +import contextlib import logging import os import random @@ -35,6 +36,8 @@ logger = logging.getLogger(__name__) # PLACEHOLDER: Extension template (do not remove this comment) +with contextlib.suppress(ImportError): + import isaaclab_tasks_experimental # noqa: F401 SKRL_VERSION = "1.4.3" diff --git a/source/isaaclab_experimental/config/extension.toml b/source/isaaclab_experimental/config/extension.toml new file mode 100644 index 000000000000..c1154edbe439 --- /dev/null +++ b/source/isaaclab_experimental/config/extension.toml @@ -0,0 +1,21 @@ +[package] + +# Note: Semantic Versioning is used: https://semver.org/ +version = "0.0.1" + +# Description +title = "Experimental playground for upcoming IsaacLab features" +description="Provides early access to future features that are not yet `production-ready`." +readme = "docs/README.md" +repository = "https://github.com/isaac-sim/IsaacLab" +category = "robotics" +keywords = ["robotics", "simulation", "experimental"] + +[dependencies] +"isaaclab" = {} + +[core] +reloadable = false + +[[python.module]] +name = "isaaclab_experimental" diff --git a/source/isaaclab_experimental/isaaclab_experimental/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/__init__.py new file mode 100644 index 000000000000..18e97e2dd188 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Package containing the core framework.""" + +import os +import toml +from enum import IntEnum + +# Conveniences to other module directories via relative paths +ISAACLAB_EXPERIMENTAL_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) +"""Path to the extension source directory.""" + +ISAACLAB_EXPERIMENTAL_METADATA = toml.load(os.path.join(ISAACLAB_EXPERIMENTAL_EXT_DIR, "config", "extension.toml")) +"""Extension metadata dictionary parsed from the extension.toml file.""" + +# Configure the module-level variables +__version__ = ISAACLAB_EXPERIMENTAL_METADATA["package"]["version"] diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py new file mode 100644 index 000000000000..81c59dda7d51 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py @@ -0,0 +1,51 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Sub-package for environment definitions. + +Environments define the interface between the agent and the simulation. +In the simplest case, the environment provides the agent with the current +observations and executes the actions provided by the agent. However, the +environment can also provide additional information such as the current +reward, done flag, and information about the current episode. + +There are two types of environment designing workflows: + +* **Manager-based**: The environment is decomposed into individual components (or managers) + for different aspects (such as computing observations, applying actions, and applying + randomization. The users mainly configure the managers and the environment coordinates the + managers and calls their functions. +* **Direct**: The user implements all the necessary functionality directly into a single class + directly without the need for additional managers. + +Based on these workflows, there are the following environment classes for single and multi-agent RL: + +**Single-Agent RL:** + +* :class:`ManagerBasedEnv`: The manager-based workflow base environment which only provides the + agent with the current observations and executes the actions provided by the agent. +* :class:`ManagerBasedRLEnv`: The manager-based workflow RL task environment which besides the + functionality of the base environment also provides additional Markov Decision Process (MDP) + related information such as the current reward, done flag, and information. +* :class:`DirectRLEnv`: The direct workflow RL task environment which provides implementations for + implementing scene setup, computing dones, performing resets, and computing reward and observation. + +**Multi-Agent RL (MARL):** + +* :class:`DirectMARLEnv`: The direct workflow MARL task environment which provides implementations for + implementing scene setup, computing dones, performing resets, and computing reward and observation. + +For more information about the workflow design patterns, see the `Task Design Workflows`_ section. + +.. _`Task Design Workflows`: https://isaac-sim.github.io/IsaacLab/source/features/task_workflows.html +""" + +from .direct_rl_env_warp import DirectRLEnvWarp # noqa: F401 +from .interactive_scene_warp import InteractiveSceneWarp # noqa: F401 + +__all__ = [ + "DirectRLEnvWarp", + "InteractiveSceneWarp", +] diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py new file mode 100644 index 000000000000..9bb0b7ed3718 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py @@ -0,0 +1,832 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import contextlib +import inspect +import logging +import math +import os +import weakref +from abc import abstractmethod +from dataclasses import MISSING +from typing import Any, ClassVar + +import gymnasium as gym +import numpy as np +import torch + +# import omni.kit.app +# import omni.log +# import omni.physx +import warp as wp + +from isaaclab.envs.common import VecEnvObs, VecEnvStepReturn +from isaaclab.envs.direct_rl_env_cfg import DirectRLEnvCfg +from isaaclab.envs.utils.spaces import sample_space, spec_to_gym_space + +# from isaaclab.envs.ui import ViewportCameraController +from isaaclab.managers import EventManager +from isaaclab.sim import SimulationContext +from isaaclab.sim.utils import use_stage +from isaaclab.utils.noise import NoiseModel +from isaaclab.utils.seed import configure_seed +from isaaclab.utils.timer import Timer + +from isaaclab_experimental.envs.interactive_scene_warp import InteractiveSceneWarp +from isaaclab_experimental.utils.warp_graph_cache import WarpGraphCache + +# from isaacsim.core.simulation_manager import SimulationManager +# from isaacsim.core.version import get_version + + +# import logger +logger = logging.getLogger(__name__) + +DEBUG_TIMER_STEP = os.environ.get("DEBUG_TIMER_STEP", "0") == "1" +"""Enable outer step() timer only. Set DEBUG_TIMER_STEP=1 env var to enable.""" + +DEBUG_TIMERS = os.environ.get("DEBUG_TIMERS", "0") == "1" +"""Enable all fine-grained inner timers (adds wp.synchronize per sub-phase). Set DEBUG_TIMERS=1 env var to enable.""" + + +@wp.kernel +def zero_mask_int32( + mask: wp.array(dtype=wp.bool), + data: wp.array(dtype=wp.int32), +): + env_index = wp.tid() + if mask[env_index]: + data[env_index] = 0 + + +@wp.kernel +def add_to_env( + data: wp.array(dtype=wp.int32), + value: wp.int32, +): + env_index = wp.tid() + data[env_index] += value + + +class DirectRLEnvWarp(gym.Env): + """The superclass for the direct workflow to design environments. + + This class implements the core functionality for reinforcement learning (RL) + environments. It is designed to be used with any RL library. The class is designed + to be used with vectorized environments, i.e., the environment is expected to be run + in parallel with multiple sub-environments. + + While the environment itself is implemented as a vectorized environment, we do not + inherit from :class:`gym.vector.VectorEnv`. This is mainly because the class adds + various methods (for wait and asynchronous updates) which are not required. + Additionally, each RL library typically has its own definition for a vectorized + environment. Thus, to reduce complexity, we directly use the :class:`gym.Env` over + here and leave it up to library-defined wrappers to take care of wrapping this + environment for their agents. + + Note: + For vectorized environments, it is recommended to **only** call the :meth:`reset` + method once before the first call to :meth:`step`, i.e. after the environment is created. + After that, the :meth:`step` function handles the reset of terminated sub-environments. + in a vectorized environment. + + """ + + is_vector_env: ClassVar[bool] = True + """Whether the environment is a vectorized environment.""" + metadata: ClassVar[dict[str, Any]] = { + "render_modes": [None, "human", "rgb_array"], + # "isaac_sim_version": get_version(), + } + """Metadata for the environment.""" + + def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs): + """Initialize the environment. + + Args: + cfg: The configuration object for the environment. + render_mode: The render mode for the environment. Defaults to None, which + is similar to ``"human"``. + + Raises: + RuntimeError: If a simulation context already exists. The environment must always create one + since it configures the simulation context and controls the simulation. + """ + # check that the config is valid + cfg.validate() + # store inputs to class + self.cfg = cfg + # store the render mode + self.render_mode = render_mode + # initialize internal variables + self._is_closed = False + + # set the seed for the environment + if self.cfg.seed is not None: + self.cfg.seed = self.seed(self.cfg.seed) + else: + logger.warning("Seed not set for the environment. The environment creation may not be deterministic.") + + # create a simulation context to control the simulator + if SimulationContext.instance() is None: + self.sim: SimulationContext = SimulationContext(self.cfg.sim) + else: + raise RuntimeError("Simulation context already exists. Cannot create a new one.") + + # make sure torch is running on the correct device + if "cuda" in self.device: + torch.cuda.set_device(self.device) + + # print useful information + print("[INFO]: Base environment:") + print(f"\tEnvironment device : {self.device}") + print(f"\tEnvironment seed : {self.cfg.seed}") + print(f"\tPhysics step-size : {self.physics_dt}") + print(f"\tRendering step-size : {self.physics_dt * self.cfg.sim.render_interval}") + print(f"\tEnvironment step-size : {self.step_dt}") + + if self.cfg.sim.render_interval < self.cfg.decimation: + msg = ( + f"The render interval ({self.cfg.sim.render_interval}) is smaller than the decimation " + f"({self.cfg.decimation}). Multiple render calls will happen for each environment step." + "If this is not intended, set the render interval to be equal to the decimation." + ) + logger.warning(msg) + + # generate scene + with Timer("[INFO]: Time taken for scene creation", "scene_creation"): + # set the stage context for scene creation steps which use the stage + with use_stage(self.sim.stage): + self.scene = InteractiveSceneWarp(self.cfg.scene) + self._setup_scene() + # attach_stage_to_usd_context() + print("[INFO]: Scene manager: ", self.scene) + + # set up camera viewport controller + # viewport is not available in other rendering modes so the function will throw a warning + # FIXME: This needs to be fixed in the future when we unify the UI functionalities even for + # non-rendering modes. + has_gui = bool(self.sim.get_setting("/isaaclab/has_gui")) + offscreen_render = bool(self.sim.get_setting("/isaaclab/render/offscreen")) + if has_gui or offscreen_render: + # self.viewport_camera_controller = ViewportCameraController(self, self.cfg.viewer) + self.viewport_camera_controller = None + else: + self.viewport_camera_controller = None + + # create event manager + # note: this is needed here (rather than after simulation play) to allow USD-related randomization events + # that must happen before the simulation starts. Example: randomizing mesh scale + if self.cfg.events: + self.event_manager = EventManager(self.cfg.events, self) + + # apply USD-related randomization events + if "prestartup" in self.event_manager.available_modes: + self.event_manager.apply(mode="prestartup") + + # play the simulator to activate physics handles + # note: this activates the physics simulation view that exposes TensorAPIs + # note: when started in extension mode, first call sim.reset_async() and then initialize the managers + # if builtins.ISAAC_LAUNCHED_FROM_TERMINAL is False: + # print("[INFO]: Starting the simulation. This may take a few seconds. Please wait...") + with Timer("[INFO]: Time taken for simulation start", "simulation_start"): + # since the reset can trigger callbacks which use the stage, + # we need to set the stage context here + with use_stage(self.sim.stage): + self.sim.reset() + # update scene to pre populate data buffers for assets and sensors. + # this is needed for the observation manager to get valid tensors for initialization. + # this shouldn't cause an issue since later on, users do a reset over all the + # environments so the lazy buffers would be reset. + self.scene.update(dt=self.physics_dt) + + # check if debug visualization is has been implemented by the environment + source_code = inspect.getsource(self._set_debug_vis_impl) + self.has_debug_vis_implementation = "NotImplementedError" not in source_code + self._debug_vis_handle = None + + # extend UI elements + # we need to do this here after all the managers are initialized + # this is because they dictate the sensors and commands right now + if bool(self.sim.settings.get("/isaaclab/visualizer")) and self.cfg.ui_window_class_type is not None: + self._window = self.cfg.ui_window_class_type(self, window_name="IsaacLab") + else: + # if no window, then we don't need to store the window + self._window = None + + # allocate dictionary to store metrics + self.extras = {} + + # initialize data and constants + # -- counter for simulation steps + self._sim_step_counter = 0 + # -- counter for curriculum + self.common_step_counter = 0 + # -- init buffers + self._episode_length_buf_wp = wp.zeros(self.num_envs, dtype=wp.int32, device=self.device) + self._episode_length_buf_torch = wp.to_torch(self._episode_length_buf_wp) + self.reset_terminated = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device) + self.reset_time_outs = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device) + self.reset_buf = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device) + self._ALL_ENV_MASK = wp.ones(self.num_envs, dtype=wp.bool, device=self.device) + + # Expected bindings: + self.torch_obs_buf: torch.Tensor = None + self.torch_reward_buf: torch.Tensor = None + self.torch_reset_terminated: torch.Tensor = None + self.torch_reset_time_outs: torch.Tensor = None + self.torch_episode_length_buf: torch.Tensor = None + + # Warp CUDA graph cache for capture-or-replay + self._graph_cache = WarpGraphCache() + + # setup the action and observation spaces for Gym + self._configure_gym_env_spaces() + + # setup noise cfg for adding action and observation noise + if self.cfg.action_noise_model: + self._action_noise_model: NoiseModel = self.cfg.action_noise_model.class_type( + self.cfg.action_noise_model, num_envs=self.num_envs, device=self.device + ) + if self.cfg.observation_noise_model: + self._observation_noise_model: NoiseModel = self.cfg.observation_noise_model.class_type( + self.cfg.observation_noise_model, num_envs=self.num_envs, device=self.device + ) + + # perform events at the start of the simulation + if self.cfg.events: + # we print it here to make the logging consistent + print("[INFO] Event Manager: ", self.event_manager) + + if "startup" in self.event_manager.available_modes: + self.event_manager.apply(mode="startup") + + # set the framerate of the gym video recorder wrapper so that the playback speed of the produced + # video matches the simulation + self.metadata["render_fps"] = 1 / self.step_dt + + # print the environment information + print("[INFO]: Completed setting up the environment...") + + def __del__(self): + """Cleanup for the environment.""" + # Suppress errors during Python shutdown to avoid noisy tracebacks + # Note: contextlib may be None during interpreter shutdown + if contextlib is not None: + with contextlib.suppress(ImportError, AttributeError, TypeError): + self.close() + + """ + Properties. + """ + + @property + def num_envs(self) -> int: + """The number of instances of the environment that are running.""" + return self.scene.num_envs + + @property + def physics_dt(self) -> float: + """The physics time-step (in s). + + This is the lowest time-decimation at which the simulation is happening. + """ + return self.cfg.sim.dt + + @property + def step_dt(self) -> float: + """The environment stepping time-step (in s). + + This is the time-step at which the environment steps forward. + """ + return self.cfg.sim.dt * self.cfg.decimation + + @property + def device(self): + """The device on which the environment is running.""" + return self.sim.device + + @property + def max_episode_length_s(self) -> float: + """Maximum episode length in seconds.""" + return self.cfg.episode_length_s + + @property + def max_episode_length(self): + """The maximum episode length in steps adjusted from s.""" + return math.ceil(self.max_episode_length_s / (self.cfg.sim.dt * self.cfg.decimation)) + + @property + def episode_length_buf(self) -> torch.Tensor: + """The episode length buffer as a torch tensor. + + This is a view of the underlying warp array ``_episode_length_buf_wp``. + Setting this property copies values in-place to preserve the shared + memory link with warp kernels. + """ + return self._episode_length_buf_torch + + @episode_length_buf.setter + def episode_length_buf(self, value: torch.Tensor): + self._episode_length_buf_torch.copy_(value) + + """ + Operations. + """ + + def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[VecEnvObs, dict]: + """Resets all the environments and returns observations. + + This function calls the :meth:`_reset_idx` function to reset all the environments. + However, certain operations, such as procedural terrain generation, that happened during initialization + are not repeated. + + Args: + seed: The seed to use for randomization. Defaults to None, in which case the seed is not set. + options: Additional information to specify how the environment is reset. Defaults to None. + + Note: + This argument is used for compatibility with Gymnasium environment definition. + + Returns: + A tuple containing the observations and extras. + """ + # set the seed + if seed is not None: + self.seed(seed) + + # reset state of scene + self._reset_idx(self._ALL_ENV_MASK) + + # update articulation kinematics + self.scene.write_data_to_sim() + + # if sensors are added to the scene, make sure we render to reflect changes in reset + if hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset: + self.sim.render() + + # if self.cfg.wait_for_textures and self.sim.has_rtx_sensors(): + # while SimulationManager.assets_loading(): + # self.sim.render() + + # return observations + self._get_observations() + return {"policy": self.torch_obs_buf.clone()}, self.extras + + @Timer(name="env_step", msg="Step took:", enable=DEBUG_TIMER_STEP or DEBUG_TIMERS) + def step(self, action: torch.Tensor) -> VecEnvStepReturn: + """Execute one time-step of the environment's dynamics. + + The environment steps forward at a fixed time-step, while the physics simulation is decimated at a + lower time-step. This is to ensure that the simulation is stable. These two time-steps can be configured + independently using the :attr:`DirectRLEnvCfg.decimation` (number of simulation steps per environment step) + and the :attr:`DirectRLEnvCfg.sim.physics_dt` (physics time-step). Based on these parameters, the environment + time-step is computed as the product of the two. + + This function performs the following steps: + + 1. Pre-process the actions before stepping through the physics. + 2. Apply the actions to the simulator and step through the physics in a decimated manner. + 3. Compute the reward and done signals. + 4. Reset environments that have terminated or reached the maximum episode length. + 5. Apply interval events if they are enabled. + 6. Compute observations. + + Args: + action: The actions to apply on the environment. Shape is (num_envs, action_dim). + + Returns: + A tuple containing the observations, rewards, resets (terminated and truncated) and extras. + """ + + action = action.to(self.device) + # add action noise + if self.cfg.action_noise_model: + action = self._action_noise_model(action) + + # process actions, #TODO pass the torch tensor directly. + with Timer(name="pre_physics", msg="Pre-physics step took:", enable=DEBUG_TIMERS): + self._pre_physics_step( + wp.from_torch(action) + ) # Creates a tensor and discards it. Not graphable unless training loop reuses the same pointer. + + # check if we need to do rendering within the physics loop + # note: checked here once to avoid multiple checks within the loop + _has_rtx = hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors() + is_rendering = bool(self.sim.settings.get("/isaaclab/visualizer")) or _has_rtx + + # perform physics stepping + with Timer(name="physics_loop", msg="Physics loop took:", enable=DEBUG_TIMERS): + for _ in range(self.cfg.decimation): + self._sim_step_counter += 1 + # set actions into buffers + # simulate + with Timer(name="apply_action", msg="Action processing step took:", enable=DEBUG_TIMERS): + self._graph_cache.capture_or_replay("action", self.step_warp_action) + + # write_data_to_sim runs outside the CUDA graph because _apply_actuator_model + # uses torch ops (wp.to_torch + torch arithmetic) that cross CUDA streams. + with Timer(name="write_data_to_sim_loop", msg="Write data to sim (loop) took:", enable=DEBUG_TIMERS): + self.scene.write_data_to_sim() + + with Timer(name="simulate", msg="Newton simulation step took:", enable=DEBUG_TIMERS): + self.sim.step(render=False) + # render between steps only if the GUI or an RTX sensor needs it + # note: we assume the render interval to be the shortest accepted rendering interval. + # If a camera needs rendering at a faster frequency, this will lead to unexpected behavior. + if self._sim_step_counter % self.cfg.sim.render_interval == 0 and is_rendering: + self.sim.render() + # update buffers at sim dt + with Timer(name="scene_update", msg="Scene update took:", enable=DEBUG_TIMERS): + self.scene.update(dt=self.physics_dt) + + self.common_step_counter += 1 # total step (common for all envs) + with Timer(name="end_pre_graph", msg="End pre-graph took:", enable=DEBUG_TIMERS): + self._graph_cache.capture_or_replay("end_pre", self._step_warp_end_pre) + # write_data_to_sim runs uncaptured — it uses torch ops that cross CUDA streams. + with Timer(name="write_data_to_sim_post", msg="Write data to sim (post-reset) took:", enable=DEBUG_TIMERS): + self.scene.write_data_to_sim() + with Timer(name="end_post_graph", msg="End post-graph took:", enable=DEBUG_TIMERS): + self._graph_cache.capture_or_replay("end_post", self._step_warp_end_post) + + # Visualization hook — runs after CUDA graph scope. Override in subclass + # to update markers or other non-graphable visual elements. + with Timer(name="visualize", msg="Visualize took:", enable=DEBUG_TIMERS): + self._post_step_visualize() + + # return observations, rewards, resets and extras + return ( + {"policy": self.torch_obs_buf.clone()}, + self.torch_reward_buf, + self.torch_reset_terminated, + self.torch_reset_time_outs, + self.extras, + ) + + def _post_step_visualize(self) -> None: + """Hook for updating visualization markers after CUDA graph scope. + + Override in subclass to update markers or other non-graphable visual + elements (e.g., those requiring wp.to_torch + .cpu().numpy()). + This runs every step, outside any CUDA graph capture. + """ + pass + + def step_warp_action(self) -> None: + self._apply_action() + # Note: scene.write_data_to_sim() is called separately outside the CUDA graph + # capture scope because it invokes _apply_actuator_model() which uses torch + # arithmetic (wp.to_torch + torch ops). This would cause a CUDA stream crossing + # error during graph capture. Moving it outside is safe since it runs every step. + + def _step_warp_end_pre(self) -> None: + """Capturable portion before write_data_to_sim (pure warp kernels).""" + wp.launch( + add_to_env, + dim=self.num_envs, + inputs=[ + self._episode_length_buf_wp, + 1, + ], + ) + self._get_dones() + self._get_rewards() + + # -- reset envs that terminated/timed-out and log the episode information + self._reset_idx(mask=self.reset_buf) + + def _step_warp_end_post(self) -> None: + """Capturable portion after write_data_to_sim (pure warp kernels).""" + # if sensors are added to the scene, make sure we render to reflect changes in reset + # if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset: + # self.sim.render() + + # TODO We could split it out. + # post-step: step interval event + # if self.cfg.events: + # if "interval" in self.event_manager.available_modes: + # self.event_manager.apply(mode="interval", dt=self.step_dt) + + # update observations + self._get_observations() + + # add observation noise + # note: we apply no noise to the state space (since it is used for critic networks) + # if self.cfg.observation_noise_model: + # self.obs_buf["policy"] = self._observation_noise_model(self.obs_buf["policy"]) + + @staticmethod + def seed(seed: int = -1) -> int: + """Set the seed for the environment. + + Args: + seed: The seed for random generator. Defaults to -1. + + Returns: + The seed used for random generator. + """ + # set seed for replicator + try: + import omni.replicator.core as rep + + rep.set_global_seed(seed) + except ModuleNotFoundError: + pass + # set seed for torch and other libraries + return configure_seed(seed) + + def render(self, recompute: bool = False) -> np.ndarray | None: + """Run rendering without stepping through the physics. + + By convention, if mode is: + + - **human**: Render to the current display and return nothing. Usually for human consumption. + - **rgb_array**: Return an numpy.ndarray with shape (x, y, 3), representing RGB values for an + x-by-y pixel image, suitable for turning into a video. + + Args: + recompute: Whether to force a render even if the simulator has already rendered the scene. + Defaults to False. + + Returns: + The rendered image as a numpy array if mode is "rgb_array". Otherwise, returns None. + + Raises: + RuntimeError: If mode is set to "rgb_data" and simulation render mode does not support it. + In this case, the simulation render mode must be set to ``RenderMode.PARTIAL_RENDERING`` + or ``RenderMode.FULL_RENDERING``. + NotImplementedError: If an unsupported rendering mode is specified. + """ + # run a rendering step of the simulator + # if we have rtx sensors, we do not need to render again sim + if not (hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors()) and not recompute: + self.sim.render() + # decide the rendering mode + if self.render_mode == "human" or self.render_mode is None: + return None + elif self.render_mode == "rgb_array": + # check that if any render could have happened + has_gui = bool(self.sim.get_setting("/isaaclab/has_gui")) + offscreen_render = bool(self.sim.get_setting("/isaaclab/render/offscreen")) + # Rendering is possible if we have GUI or offscreen rendering enabled + can_render = has_gui or offscreen_render + + if not can_render: + render_mode_name = "NO_GUI_OR_RENDERING" + raise RuntimeError( + f"Cannot render '{self.render_mode}' when the simulation render mode is" + f" '{render_mode_name}'. Please set the simulation render mode" + " to:'PARTIAL_RENDERING' or" + " 'FULL_RENDERING'. If running headless, make" + " sure --enable_cameras is set." + ) + # create the annotator if it does not exist + if not hasattr(self, "_rgb_annotator"): + import omni.replicator.core as rep + + # create render product + self._render_product = rep.create.render_product( + self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution + ) + # create rgb annotator -- used to read data from the render product + self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") + self._rgb_annotator.attach([self._render_product]) + # obtain the rgb data + rgb_data = self._rgb_annotator.get_data() + # convert to numpy array + rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) + # return the rgb data + # note: initially the renerer is warming up and returns empty data + if rgb_data.size == 0: + return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) + else: + return rgb_data[:, :, :3] + else: + raise NotImplementedError( + f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." + ) + + def close(self): + """Cleanup for the environment.""" + if not self._is_closed: + # close entities related to the environment + # note: this is order-sensitive to avoid any dangling references + if self.cfg.events: + del self.event_manager + del self.scene + if self.viewport_camera_controller is not None: + del self.viewport_camera_controller + + # # clear callbacks and instance + # if float(".".join(get_version()[2])) >= 5: + # if self.cfg.sim.create_stage_in_memory: + # # detach physx stage + # omni.physx.get_physx_simulation_interface().detach_stage() + # self.sim.stop() + # self.sim.clear() + + # self.sim.clear_all_callbacks() + self.sim.clear_instance() + + # destroy the window + if self._window is not None: + self._window = None + # update closing status + self._is_closed = True + + """ + Operations - Debug Visualization. + """ + + def set_debug_vis(self, debug_vis: bool) -> bool: + """Toggles the environment debug visualization. + + Args: + debug_vis: Whether to visualize the environment debug visualization. + + Returns: + Whether the debug visualization was successfully set. False if the environment + does not support debug visualization. + """ + # check if debug visualization is supported + if not self.has_debug_vis_implementation: + return False + # toggle debug visualization objects + self._set_debug_vis_impl(debug_vis) + # toggle debug visualization handles + if debug_vis: + import omni.kit.app + + # create a subscriber for the post update event if it doesn't exist + if self._debug_vis_handle is None: + app_interface = omni.kit.app.get_app_interface() + self._debug_vis_handle = app_interface.get_post_update_event_stream().create_subscription_to_pop( + lambda event, obj=weakref.proxy(self): obj._debug_vis_callback(event) + ) + else: + # remove the subscriber if it exists + if self._debug_vis_handle is not None: + self._debug_vis_handle.unsubscribe() + self._debug_vis_handle = None + # return success + return True + + """ + Helper functions. + """ + + def _configure_gym_env_spaces(self): + """Configure the action and observation spaces for the Gym environment.""" + # show deprecation message and overwrite configuration + if self.cfg.num_actions is not None: + logger.warning("DirectRLEnvCfg.num_actions is deprecated. Use DirectRLEnvCfg.action_space instead.") + if isinstance(self.cfg.action_space, type(MISSING)): + self.cfg.action_space = self.cfg.num_actions + if self.cfg.num_observations is not None: + logger.warning( + "DirectRLEnvCfg.num_observations is deprecated. Use DirectRLEnvCfg.observation_space instead." + ) + if isinstance(self.cfg.observation_space, type(MISSING)): + self.cfg.observation_space = self.cfg.num_observations + if self.cfg.num_states is not None: + logger.warning("DirectRLEnvCfg.num_states is deprecated. Use DirectRLEnvCfg.state_space instead.") + if isinstance(self.cfg.state_space, type(MISSING)): + self.cfg.state_space = self.cfg.num_states + + # set up spaces + self.single_observation_space = gym.spaces.Dict() + self.single_observation_space["policy"] = spec_to_gym_space(self.cfg.observation_space) + self.single_action_space = spec_to_gym_space(self.cfg.action_space) + + # batch the spaces for vectorized environments + self.observation_space = gym.vector.utils.batch_space(self.single_observation_space["policy"], self.num_envs) + self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs) + + # optional state space for asymmetric actor-critic architectures + self.state_space = None + if self.cfg.state_space: + self.single_observation_space["critic"] = spec_to_gym_space(self.cfg.state_space) + self.state_space = gym.vector.utils.batch_space(self.single_observation_space["critic"], self.num_envs) + + # instantiate actions (needed for tasks for which the observations computation is dependent on the actions) + self.actions = sample_space(self.single_action_space, self.sim.device, batch_size=self.num_envs, fill_value=0) + + def _reset_idx(self, mask: wp.array | None = None): + """Reset environments based on a boolean mask. + + Args: + mask: Boolean mask indicating which environments to reset. + Shape is (num_envs,). If None, all environments are reset. + """ + if mask is None: + mask = self._ALL_ENV_MASK + self.scene.reset(env_ids=None, env_mask=mask) + + # apply events such as randomization for environments that need a reset + # if self.cfg.events: + # if "reset" in self.event_manager.available_modes: + # env_step_count = self._sim_step_counter // self.cfg.decimation + # self.event_manager.apply(mode="reset", env_ids=env_ids, global_env_step_count=env_step_count) + + # reset noise models + # if self.cfg.action_noise_model: + # self._action_noise_model.reset(env_ids) + # if self.cfg.observation_noise_model: + # self._observation_noise_model.reset(env_ids) + + # reset the episode length buffer + wp.launch( + zero_mask_int32, + dim=self.num_envs, + inputs=[ + mask, + self._episode_length_buf_wp, + ], + ) + + """ + Implementation-specific functions. + """ + + def _setup_scene(self): + """Setup the scene for the environment. + + This function is responsible for creating the scene objects and setting up the scene for the environment. + The scene creation can happen through :class:`isaaclab.scene.InteractiveSceneCfg` or through + directly creating the scene objects and registering them with the scene manager. + + We leave the implementation of this function to the derived classes. If the environment does not require + any explicit scene setup, the function can be left empty. + """ + pass + + @abstractmethod + def _pre_physics_step(self, actions: wp.array) -> None: + """Pre-process actions before stepping through the physics. + + This function is responsible for pre-processing the actions before stepping through the physics. + It is called before the physics stepping (which is decimated). + + Args: + actions: The actions to apply on the environment. Shape is (num_envs, action_dim). + """ + raise NotImplementedError(f"Please implement the '_pre_physics_step' method for {self.__class__.__name__}.") + + @abstractmethod + def _apply_action(self) -> None: + """Apply actions to the simulator. + + This function is responsible for applying the actions to the simulator. It is called at each + physics time-step. Must be pure warp (no torch ops) to be CUDA graph capturable. + """ + raise NotImplementedError(f"Please implement the '_apply_action' method for {self.__class__.__name__}.") + + @abstractmethod + def _get_observations(self) -> dict: + """Compute and return the observations for the environment. + + Returns: + The observations dictionary, e.g. ``{"policy": tensor}``. + """ + raise NotImplementedError(f"Please implement the '_get_observations' method for {self.__class__.__name__}.") + + def _get_states(self) -> VecEnvObs | None: + """Compute and return the states for the environment. + + The state-space is used for asymmetric actor-critic architectures. It is configured + using the :attr:`DirectRLEnvCfg.state_space` parameter. + + Returns: + The states for the environment. If the environment does not have a state-space, the function + returns a None. + """ + return None # noqa: R501 + + @abstractmethod + def _get_rewards(self) -> None: + """Compute the rewards for the environment. + + Writes results into the reward buffer (e.g., ``self.reward_buf``). + """ + raise NotImplementedError(f"Please implement the '_get_rewards' method for {self.__class__.__name__}.") + + @abstractmethod + def _get_dones(self) -> None: + """Compute the done flags for the environment. + + Writes results into the done buffers (e.g., ``self.reset_terminated``, ``self.reset_time_outs``). + """ + raise NotImplementedError(f"Please implement the '_get_dones' method for {self.__class__.__name__}.") + + def _set_debug_vis_impl(self, debug_vis: bool): + """Set debug visualization into visualization objects. + + This function is responsible for creating the visualization objects if they don't exist + and input ``debug_vis`` is True. If the visualization objects exist, the function should + set their visibility into the stage. + """ + raise NotImplementedError(f"Debug visualization is not implemented for {self.__class__.__name__}.") diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py new file mode 100644 index 000000000000..f792b994f67b --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py @@ -0,0 +1,44 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp-native interactive scene with env_mask support for reset.""" + +from __future__ import annotations + +from collections.abc import Sequence + +import warp as wp + +from isaaclab.scene import InteractiveScene + + +class InteractiveSceneWarp(InteractiveScene): + """Interactive scene with warp-native env_mask support for reset. + + Extends :class:`InteractiveScene` to accept a boolean warp mask for selective resets, + avoiding the need to convert between env_ids and masks. + """ + + def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None = None): + """Reset scene entities using either env_ids or a boolean env_mask. + + Args: + env_ids: The indices of the environments to reset. Defaults to None (all instances). + env_mask: Boolean warp mask of shape (num_envs,). Defaults to None. + """ + # -- assets (support env_mask) + for articulation in self._articulations.values(): + articulation.reset(env_ids, env_mask=env_mask) + for deformable_object in self._deformable_objects.values(): + deformable_object.reset(env_ids) + for rigid_object in self._rigid_objects.values(): + rigid_object.reset(env_ids, env_mask=env_mask) + for surface_gripper in self._surface_grippers.values(): + surface_gripper.reset(env_ids) + for rigid_object_collection in self._rigid_object_collections.values(): + rigid_object_collection.reset(env_ids, env_mask=env_mask) + # -- sensors (no env_mask support) + for sensor in self._sensors.values(): + sensor.reset(env_ids) diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py new file mode 100644 index 000000000000..d28381b15b76 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Sub-package for environment utils.""" diff --git a/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py b/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py new file mode 100644 index 000000000000..d2ac09bd4f70 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py @@ -0,0 +1,73 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp CUDA graph capture-or-replay utility.""" + +from collections.abc import Callable +from typing import Any + +import warp as wp + + +class WarpGraphCache: + """Caches Warp CUDA graphs by stage name: captures on first call, replays after. + + On the first call for a given stage, the function is recorded into a CUDA + graph and then **immediately replayed** so the graph is validated right away. + The return value from the capture run is cached and returned on every + subsequent replay, ensuring captured stages return the same references + (e.g. tensor views) as eager stages. + + Usage:: + + cache = WarpGraphCache() + result = cache.capture_or_replay("my_stage", my_warp_function) + # uncaptured work here ... + result2 = cache.capture_or_replay("my_stage_post", my_other_function) + """ + + def __init__(self): + self._graphs: dict[str, Any] = {} + self._results: dict[str, Any] = {} + + def capture_or_replay( + self, + stage: str, + fn: Callable[..., Any], + args: tuple = (), + kwargs: dict[str, Any] | None = None, + ) -> Any: + """Capture *fn* into a CUDA graph on the first call, then replay. + + Args: + stage: Unique name identifying this captured scope. + fn: The callable to capture. Must contain only CUDA-graph-safe + operations (pure warp kernels, no Python-level branching on + GPU data). + args: Positional arguments forwarded to *fn*. Defaults to ``()``. + kwargs: Keyword arguments forwarded to *fn*. Defaults to ``None``. + + Returns: + The cached return value from the first (capture) invocation. + """ + if kwargs is None: + kwargs = {} + graph = self._graphs.get(stage) + if graph is None: + with wp.ScopedCapture() as capture: + result = fn(*args, **kwargs) + self._graphs[stage] = capture.graph + self._results[stage] = result + wp.capture_launch(self._graphs[stage]) + return self._results[stage] + + def invalidate(self, stage: str | None = None) -> None: + """Drop cached graph(s). If *stage* is ``None``, drop all.""" + if stage is None: + self._graphs.clear() + self._results.clear() + else: + self._graphs.pop(stage, None) + self._results.pop(stage, None) diff --git a/source/isaaclab_experimental/pyproject.toml b/source/isaaclab_experimental/pyproject.toml new file mode 100644 index 000000000000..d90ac3536f16 --- /dev/null +++ b/source/isaaclab_experimental/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel", "toml"] +build-backend = "setuptools.build_meta" diff --git a/source/isaaclab_experimental/setup.py b/source/isaaclab_experimental/setup.py new file mode 100644 index 000000000000..d44ab67d789e --- /dev/null +++ b/source/isaaclab_experimental/setup.py @@ -0,0 +1,44 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Installation script for the 'isaaclab_experimental' python package.""" + +import os + +import toml +from setuptools import find_packages, setup + +# Obtain the extension data from the extension.toml file +EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__)) +# Read the extension.toml file +EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml")) + +# Minimum dependencies required prior to installation +INSTALL_REQUIRES = [ + "toml", +] + +# Installation operation +setup( + name="isaaclab_experimental", + author="Isaac Lab Project Developers", + maintainer="Isaac Lab Project Developers", + url=EXTENSION_TOML_DATA["package"]["repository"], + version=EXTENSION_TOML_DATA["package"]["version"], + description=EXTENSION_TOML_DATA["package"]["description"], + keywords=EXTENSION_TOML_DATA["package"]["keywords"], + license="BSD-3-Clause", + include_package_data=True, + python_requires=">=3.10", + install_requires=INSTALL_REQUIRES, + packages=find_packages(), + classifiers=[ + "Natural Language :: English", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Isaac Sim :: 5.0.0", + ], + zip_safe=False, +) diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index e7e52ca43a96..7e753c0f7a2a 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -47,7 +47,16 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N # NOTE: import here (not at module level) to avoid loading heavy env classes before Isaac Sim is initialized. from isaaclab.envs import DirectRLEnv, ManagerBasedEnv, ManagerBasedRLEnv - if not isinstance(env.unwrapped, (ManagerBasedRLEnv, ManagerBasedEnv, DirectRLEnv)): + try: + from isaaclab_experimental.envs import DirectRLEnvWarp + except ImportError: + DirectRLEnvWarp = None + + allowed_types = (ManagerBasedRLEnv, ManagerBasedEnv, DirectRLEnv) + if DirectRLEnvWarp is not None: + allowed_types += (DirectRLEnvWarp,) + + if not isinstance(env.unwrapped, allowed_types): raise ValueError( "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:" f" {type(env)}" diff --git a/source/isaaclab_tasks_experimental/config/extension.toml b/source/isaaclab_tasks_experimental/config/extension.toml new file mode 100644 index 000000000000..75de8d6da0ef --- /dev/null +++ b/source/isaaclab_tasks_experimental/config/extension.toml @@ -0,0 +1,22 @@ +[package] + +# Note: Semantic Versioning is used: https://semver.org/ +version = "0.0.1" + +# Description +title = "Experimental environments for IsaacLab" +description="Extension containing suite of experimental environments for robot learning." +readme = "docs/README.md" +repository = "https://github.com/isaac-sim/IsaacLab" +category = "robotics" +keywords = ["robotics", "rl", "il", "learning"] + +[dependencies] +"isaaclab" = {} +"isaaclab_assets" = {} + +[core] +reloadable = false + +[[python.module]] +name = "isaaclab_tasks.experimental" diff --git a/source/isaaclab_tasks_experimental/docs/README.md b/source/isaaclab_tasks_experimental/docs/README.md new file mode 100644 index 000000000000..d9e681518d64 --- /dev/null +++ b/source/isaaclab_tasks_experimental/docs/README.md @@ -0,0 +1,3 @@ +# Isaac Lab: Experimental Environment Suite + +Experimental environments for robot learning built on top of Isaac Lab. diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py new file mode 100644 index 000000000000..918c41d73d7b --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py @@ -0,0 +1,32 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Package containing task implementations for various robotic environments.""" + +import os +import toml + +# Conveniences to other module directories via relative paths +ISAACLAB_TASKS_EXPERIMENTAL_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) +"""Path to the extension source directory.""" + +ISAACLAB_TASKS_EXPERIMENTAL_METADATA = toml.load( + os.path.join(ISAACLAB_TASKS_EXPERIMENTAL_EXT_DIR, "config", "extension.toml") +) +"""Extension metadata dictionary parsed from the extension.toml file.""" + +# Configure the module-level variables +__version__ = ISAACLAB_TASKS_EXPERIMENTAL_METADATA["package"]["version"] + +## +# Register Gym environments. +## + +from isaaclab_tasks.utils import import_packages + +# The blacklist is used to prevent importing configs from sub-packages +_BLACKLIST_PKGS = ["utils", ".mdp"] +# Import all configs in this package +import_packages(__name__, _BLACKLIST_PKGS) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py new file mode 100644 index 000000000000..3e2b7945ebde --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Direct workflow environments. +""" + +import gymnasium as gym diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py new file mode 100644 index 000000000000..c954081b9c54 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Allegro Inhand Manipulation environment. +""" + +import gymnasium as gym + +## +# Register Gym environments. +## + +inhand_task_entry = "isaaclab_tasks_experimental.direct.inhand_manipulation" +stable_agents = "isaaclab_tasks.direct.allegro_hand.agents" + +gym.register( + id="Isaac-Repose-Cube-Allegro-Direct-Warp-v0", + entry_point=f"{inhand_task_entry}.inhand_manipulation_warp_env:InHandManipulationWarpEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.allegro_hand_warp_env_cfg:AllegroHandWarpEnvCfg", + "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:AllegroHandPPORunnerCfg", + "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py new file mode 100644 index 000000000000..2d607d475329 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py @@ -0,0 +1,136 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg # , RigidObjectCfg +from isaaclab.envs import DirectRLEnvCfg +from isaaclab.markers import VisualizationMarkersCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +from isaaclab_assets.robots.allegro import ALLEGRO_HAND_CFG + + +@configclass +class AllegroHandWarpEnvCfg(DirectRLEnvCfg): + # env + decimation = 4 + episode_length_s = 10.0 + action_space = 16 + observation_space = 124 # (full) + state_space = 0 + asymmetric_obs = False + obs_type = "full" + + solver_cfg = MJWarpSolverCfg( + solver="newton", + integrator="implicitfast", + njmax=80, + nconmax=70, + impratio=10.0, + cone="elliptic", + update_data_interval=2, + iterations=100, + ls_iterations=15, + ls_parallel=False, + # save_to_mjcf="AllegroHand.xml", + ) + + newton_cfg = NewtonCfg( + solver_cfg=solver_cfg, + num_substeps=2, + debug_mode=False, + ) + # simulation + sim: SimulationCfg = SimulationCfg( + dt=1 / 120, + render_interval=decimation, + physics_material=RigidBodyMaterialCfg( + static_friction=1.0, + dynamic_friction=1.0, + ), + physics=newton_cfg, + ) + # robot + robot_cfg: ArticulationCfg = ALLEGRO_HAND_CFG.replace(prim_path="/World/envs/env_.*/Robot") + + actuated_joint_names = [ + "index_joint_0", + "middle_joint_0", + "ring_joint_0", + "thumb_joint_0", + "index_joint_1", + "index_joint_2", + "index_joint_3", + "middle_joint_1", + "middle_joint_2", + "middle_joint_3", + "ring_joint_1", + "ring_joint_2", + "ring_joint_3", + "thumb_joint_1", + "thumb_joint_2", + "thumb_joint_3", + ] + fingertip_body_names = [ + "index_link_3", + "middle_link_3", + "ring_link_3", + "thumb_link_3", + ] + + # in-hand object + object_cfg: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/object", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd", + mass_props=sim_utils.MassPropertiesCfg(density=400.0), + scale=(1.2, 1.2, 1.2), + ), + # FIXME: it does seem to be a bug for ArticulationCfg for handling empty joint list + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, -0.17, 0.565), rot=(0.0, 0.0, 0.0, 1.0), joint_pos={}, joint_vel={} + ), + actuators={}, + articulation_root_prim_path="", + ) + # goal object + goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg( + prim_path="/Visuals/goal_marker", + markers={ + "goal": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd", + scale=(1.2, 1.2, 1.2), + ) + }, + ) + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg( + num_envs=8192, env_spacing=0.75, replicate_physics=True, clone_in_fabric=True + ) + # reset + reset_position_noise = 0.01 # range of position at reset + reset_dof_pos_noise = 0.2 # range of dof pos at reset + reset_dof_vel_noise = 0.0 # range of dof vel at reset + # reward scales + dist_reward_scale = -10.0 + rot_reward_scale = 1.0 + rot_eps = 0.1 + action_penalty_scale = -0.0002 + reach_goal_bonus = 250 + fall_penalty = 0 + fall_dist = 0.24 + vel_obs_scale = 0.2 + success_tolerance = 0.2 + max_consecutive_success = 0 + av_factor = 0.1 + act_moving_average = 1.0 + force_torque_obs_scale = 10.0 diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/__init__.py new file mode 100644 index 000000000000..d5f4b459a8a6 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/__init__.py @@ -0,0 +1,28 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Ant locomotion environment. +""" + +import gymnasium as gym + +## +# Register Gym environments. +## + +stable_agents = "isaaclab_tasks.direct.ant.agents" + +gym.register( + id="Isaac-Ant-Direct-Warp-v0", + entry_point=f"{__name__}.ant_env_warp:AntWarpEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.ant_env_warp:AntWarpEnvCfg", + "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:AntPPORunnerCfg", + "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/ant_env_warp.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/ant_env_warp.py new file mode 100644 index 000000000000..a2c8f91fa3db --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/ant_env_warp.py @@ -0,0 +1,91 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg +from isaaclab.envs import DirectRLEnvCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass + +from isaaclab_assets import ANT_CFG + +from isaaclab_tasks_experimental.direct.locomotion.locomotion_env_warp import LocomotionWarpEnv + + +@configclass +class AntWarpEnvCfg(DirectRLEnvCfg): + # env + episode_length_s = 15.0 + decimation = 2 + action_scale = 0.5 + action_space = 8 + observation_space = 36 + state_space = 0 + + solver_cfg = MJWarpSolverCfg( + njmax=45, + nconmax=25, + cone="pyramidal", + integrator="implicitfast", + impratio=1, + ) + newton_cfg = NewtonCfg( + solver_cfg=solver_cfg, + num_substeps=1, + debug_mode=False, + use_cuda_graph=True, + ) + + # simulation + sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation, physics=newton_cfg) + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="plane", + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="average", + restitution_combine_mode="average", + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.0, + ), + debug_vis=False, + ) + + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg( + num_envs=4096, env_spacing=4.0, replicate_physics=True, clone_in_fabric=True + ) + + # robot + robot: ArticulationCfg = ANT_CFG.replace(prim_path="/World/envs/env_.*/Robot") + joint_gears: list = [15, 15, 15, 15, 15, 15, 15, 15] + + heading_weight: float = 0.5 + up_weight: float = 0.1 + + energy_cost_scale: float = 0.05 + actions_cost_scale: float = 0.005 + alive_reward_scale: float = 0.5 + dof_vel_scale: float = 0.2 + + death_cost: float = -2.0 + termination_height: float = 0.31 + + angular_velocity_scale: float = 1.0 + contact_force_scale: float = 0.1 + + +class AntWarpEnv(LocomotionWarpEnv): + cfg: AntWarpEnvCfg + + def __init__(self, cfg: AntWarpEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/__init__.py new file mode 100644 index 000000000000..19b9f26a287c --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Cartpole balancing environment. +""" + +import gymnasium as gym + +## +# Register Gym environments. +## + +stable_agents = "isaaclab_tasks.direct.cartpole.agents" + +gym.register( + id="Isaac-Cartpole-Direct-Warp-v0", + entry_point=f"{__name__}.cartpole_warp_env:CartpoleWarpEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.cartpole_warp_env:CartpoleWarpEnvCfg", + "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:CartpolePPORunnerCfg", + "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml", + "sb3_cfg_entry_point": f"{stable_agents}:sb3_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/cartpole_warp_env.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/cartpole_warp_env.py new file mode 100644 index 000000000000..a9670c6b8dd4 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/cartpole_warp_env.py @@ -0,0 +1,357 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import math + +import warp as wp +from isaaclab_experimental.envs import DirectRLEnvWarp +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation, ArticulationCfg +from isaaclab.envs import DirectRLEnvCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane +from isaaclab.utils import configclass + +from isaaclab_assets.robots.cartpole import CARTPOLE_CFG + + +@configclass +class CartpoleWarpEnvCfg(DirectRLEnvCfg): + # env + decimation = 2 + episode_length_s = 5.0 + action_scale = 100.0 # [N] + action_space = 1 + observation_space = 4 + state_space = 0 + + solver_cfg = MJWarpSolverCfg( + njmax=5, + nconmax=3, + cone="pyramidal", + integrator="implicitfast", + impratio=1, + ) + + newton_cfg = NewtonCfg( + solver_cfg=solver_cfg, + num_substeps=1, + debug_mode=False, + use_cuda_graph=True, + ) + + # simulation + sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation, physics=newton_cfg) + + # robot + robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot") + cart_dof_name = "slider_to_cart" + pole_dof_name = "cart_to_pole" + + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg( + num_envs=4096, env_spacing=4.0, replicate_physics=True, clone_in_fabric=True + ) + + # reset + max_cart_pos = 3.0 # the cart is reset if it exceeds that position [m] + initial_pole_angle_range = [-0.25, 0.25] # the range in which the pole angle is sampled from on reset [x pi rad] + + # reward scales + rew_scale_alive = 1.0 + rew_scale_terminated = -2.0 + rew_scale_pole_pos = -1.0 + rew_scale_cart_vel = -0.01 + rew_scale_pole_vel = -0.005 + + +@wp.kernel +def get_observations( + joint_pos: wp.array2d(dtype=wp.float32), + joint_vel: wp.array2d(dtype=wp.float32), + cart_dof_idx: wp.int32, + pole_dof_idx: wp.int32, + observations: wp.array(dtype=wp.vec4f), +): + env_index = wp.tid() + observations[env_index][0] = joint_pos[env_index, pole_dof_idx] + observations[env_index][1] = joint_vel[env_index, pole_dof_idx] + observations[env_index][2] = joint_pos[env_index, cart_dof_idx] + observations[env_index][3] = joint_vel[env_index, cart_dof_idx] + + +@wp.kernel +def update_actions( + input_actions: wp.array2d(dtype=wp.float32), + actions: wp.array2d(dtype=wp.float32), + action_scale: wp.float32, + cart_dof_idx: wp.int32, +): + env_index = wp.tid() + actions[env_index, cart_dof_idx] = action_scale * input_actions[env_index, 0] + + +@wp.kernel +def get_dones( + joint_pos: wp.array2d(dtype=wp.float32), + episode_length_buf: wp.array(dtype=wp.int32), + cart_dof_idx: wp.int32, + pole_dof_idx: wp.int32, + max_episode_length: wp.int32, + max_cart_pos: wp.float32, + out_of_bounds: wp.array(dtype=wp.bool), + time_out: wp.array(dtype=wp.bool), + reset: wp.array(dtype=wp.bool), +): + env_index = wp.tid() + out_of_bounds[env_index] = (wp.abs(joint_pos[env_index, cart_dof_idx]) > max_cart_pos) or ( + wp.abs(joint_pos[env_index, pole_dof_idx]) > math.pi / 2.0 + ) + time_out[env_index] = episode_length_buf[env_index] >= (max_episode_length - 1) + reset[env_index] = out_of_bounds[env_index] or time_out[env_index] + + +@wp.func +def compute_rew_alive(rew_scale_alive: wp.float32, reset_terminated: bool) -> wp.float32: + if reset_terminated: + return wp.float32(0.0) + return rew_scale_alive + + +@wp.func +def compute_rew_termination(rew_scale_terminated: wp.float32, reset_terminated: bool) -> wp.float32: + if reset_terminated: + return rew_scale_terminated + return wp.float32(0.0) + + +@wp.func +def compute_rew_pole_pos( + rew_scale_pole_pos: wp.float32, + pole_pos: wp.float32, +) -> wp.float32: + return rew_scale_pole_pos * pole_pos * pole_pos + + +@wp.func +def compute_rew_cart_vel( + rew_scale_cart_vel: wp.float32, + cart_vel: wp.float32, +) -> wp.float32: + return rew_scale_cart_vel * wp.abs(cart_vel) + + +@wp.func +def compute_rew_pole_vel( + rew_scale_pole_vel: wp.float32, + pole_vel: wp.float32, +) -> wp.float32: + return rew_scale_pole_vel * wp.abs(pole_vel) + + +@wp.kernel +def compute_rewards( + rew_scale_alive: wp.float32, + rew_scale_terminated: wp.float32, + rew_scale_pole_pos: wp.float32, + rew_scale_cart_vel: wp.float32, + rew_scale_pole_vel: wp.float32, + joint_pos: wp.array2d(dtype=wp.float32), + joint_vel: wp.array2d(dtype=wp.float32), + cart_dof_idx: wp.int32, + pole_dof_idx: wp.int32, + reset_terminated: wp.array(dtype=wp.bool), + reward: wp.array(dtype=wp.float32), +): + env_index = wp.tid() + reward[env_index] = ( + compute_rew_alive(rew_scale_alive, reset_terminated[env_index]) + + compute_rew_termination(rew_scale_terminated, reset_terminated[env_index]) + + compute_rew_pole_pos(rew_scale_pole_pos, joint_pos[env_index, pole_dof_idx]) + + compute_rew_cart_vel(rew_scale_cart_vel, joint_vel[env_index, cart_dof_idx]) + + compute_rew_pole_vel(rew_scale_pole_vel, joint_vel[env_index, pole_dof_idx]) + ) + + +@wp.kernel +def reset( + default_joint_pos: wp.array2d(dtype=wp.float32), + default_joint_vel: wp.array2d(dtype=wp.float32), + joint_pos: wp.array2d(dtype=wp.float32), + joint_vel: wp.array2d(dtype=wp.float32), + cart_dof_idx: wp.int32, + pole_dof_idx: wp.int32, + initial_pose_angle_range: wp.vec2f, + env_mask: wp.array(dtype=wp.bool), + state: wp.array(dtype=wp.uint32), +): + env_index = wp.tid() + if env_mask[env_index]: + joint_pos[env_index, cart_dof_idx] = default_joint_pos[env_index, cart_dof_idx] + joint_pos[env_index, pole_dof_idx] = default_joint_pos[env_index, pole_dof_idx] + wp.randf( + state[env_index], initial_pose_angle_range[0] * wp.pi, initial_pose_angle_range[1] * wp.pi + ) + joint_vel[env_index, cart_dof_idx] = default_joint_vel[env_index, cart_dof_idx] + joint_vel[env_index, pole_dof_idx] = default_joint_vel[env_index, pole_dof_idx] + state[env_index] += wp.uint32(1) + + +@wp.kernel +def initialize_state( + state: wp.array(dtype=wp.uint32), + seed: wp.int32, +): + env_index = wp.tid() + state[env_index] = wp.rand_init(seed, env_index) + + +class CartpoleWarpEnv(DirectRLEnvWarp): + cfg: CartpoleWarpEnvCfg + + def __init__(self, cfg: CartpoleWarpEnvCfg, render_mode: str | None = None, **kwargs) -> None: + super().__init__(cfg, render_mode, **kwargs) + + # Get the indices (develop API: find_joints returns (indices, names)) + self._cart_dof_idx, _ = self.cartpole.find_joints(self.cfg.cart_dof_name) + self._pole_dof_idx, _ = self.cartpole.find_joints(self.cfg.pole_dof_name) + + self.action_scale = self.cfg.action_scale + + # Simulation bindings + # Note: these are direct memory views into the Newton simulation data, they should not be modified directly + self.joint_pos = self.cartpole.data.joint_pos + self.joint_vel = self.cartpole.data.joint_vel + + # Buffers + self.observations = wp.zeros((self.num_envs), dtype=wp.vec4f, device=self.device) + self.actions = wp.zeros((self.num_envs, self.cartpole.num_joints), dtype=wp.float32, device=self.device) + self.rewards = wp.zeros((self.num_envs), dtype=wp.float32, device=self.device) + self.states = wp.zeros((self.num_envs), dtype=wp.uint32, device=self.device) + + if self.cfg.seed is None: + self.cfg.seed = -1 + + wp.launch( + initialize_state, + dim=self.num_envs, + inputs=[ + self.states, + self.cfg.seed, + ], + ) + + # Bind torch buffers to warp buffers + self.torch_obs_buf = wp.to_torch(self.observations) + self.torch_reward_buf = wp.to_torch(self.rewards) + self.torch_reset_terminated = wp.to_torch(self.reset_terminated) + self.torch_reset_time_outs = wp.to_torch(self.reset_time_outs) + self.torch_episode_length_buf = self.episode_length_buf # already a torch tensor via wp.to_torch + + def _setup_scene(self) -> None: + self.cartpole = Articulation(self.cfg.robot_cfg) + # add ground plane + spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg()) + # clone and replicate + self.scene.clone_environments(copy_from_source=False) + # we need to explicitly filter collisions for CPU simulation + if self.device == "cpu": + self.scene.filter_collisions(global_prim_paths=[]) + # add articulation to scene + self.scene.articulations["cartpole"] = self.cartpole + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: wp.array) -> None: + wp.launch( + update_actions, + dim=self.num_envs, + inputs=[ + actions, + self.actions, + self.action_scale, + self._cart_dof_idx[0], + ], + ) + + def _apply_action(self) -> None: + self.cartpole.set_joint_effort_target_mask(target=self.actions) + + def _get_observations(self) -> dict: + wp.launch( + get_observations, + dim=self.num_envs, + inputs=[ + self.joint_pos, + self.joint_vel, + self._cart_dof_idx[0], + self._pole_dof_idx[0], + self.observations, + ], + ) + return {"policy": self.torch_obs_buf} + + def _get_rewards(self) -> None: + wp.launch( + compute_rewards, + dim=self.num_envs, + inputs=[ + self.cfg.rew_scale_alive, + self.cfg.rew_scale_terminated, + self.cfg.rew_scale_pole_pos, + self.cfg.rew_scale_cart_vel, + self.cfg.rew_scale_pole_vel, + self.joint_pos, + self.joint_vel, + self._cart_dof_idx[0], + self._pole_dof_idx[0], + self.reset_terminated, + self.rewards, + ], + ) + + def _get_dones(self) -> None: + wp.launch( + get_dones, + dim=self.num_envs, + inputs=[ + self.joint_pos, + self._episode_length_buf_wp, + self._cart_dof_idx[0], + self._pole_dof_idx[0], + self.max_episode_length, + self.cfg.max_cart_pos, + self.reset_terminated, + self.reset_time_outs, + self.reset_buf, + ], + ) + + def _reset_idx(self, mask: wp.array | None = None) -> None: + if mask is None: + mask = self._ALL_ENV_MASK + + super()._reset_idx(mask) + + wp.launch( + reset, + dim=self.num_envs, + inputs=[ + self.cartpole.data.default_joint_pos, + self.cartpole.data.default_joint_vel, + self.joint_pos, + self.joint_vel, + self._cart_dof_idx[0], + self._pole_dof_idx[0], + self.cfg.initial_pole_angle_range, + mask, + self.states, + ], + ) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/__init__.py new file mode 100644 index 000000000000..8dcb10a57bfd --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/__init__.py @@ -0,0 +1,28 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Humanoid locomotion environment. +""" + +import gymnasium as gym + +## +# Register Gym environments. +## + +stable_agents = "isaaclab_tasks.direct.humanoid.agents" + +gym.register( + id="Isaac-Humanoid-Direct-Warp-v0", + entry_point=f"{__name__}.humanoid_warp_env:HumanoidWarpEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.humanoid_warp_env:HumanoidWarpEnvCfg", + "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:HumanoidPPORunnerCfg", + "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/humanoid_warp_env.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/humanoid_warp_env.py new file mode 100644 index 000000000000..1dc4d23f7814 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/humanoid_warp_env.py @@ -0,0 +1,114 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg +from isaaclab.envs import DirectRLEnvCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.terrains import TerrainImporterCfg +from isaaclab.utils import configclass + +from isaaclab_assets import HUMANOID_CFG + +from isaaclab_tasks_experimental.direct.locomotion.locomotion_env_warp import LocomotionWarpEnv + + +@configclass +class HumanoidWarpEnvCfg(DirectRLEnvCfg): + # env + episode_length_s = 15.0 + decimation = 2 + action_scale = 1.0 + action_space = 21 + observation_space = 75 + state_space = 0 + + solver_cfg = MJWarpSolverCfg( + njmax=80, + nconmax=25, + cone="pyramidal", + integrator="implicitfast", + update_data_interval=2, + impratio=1, + ) + newton_cfg = NewtonCfg( + solver_cfg=solver_cfg, + num_substeps=2, + debug_mode=False, + use_cuda_graph=True, + ) + + # simulation + sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation, physics=newton_cfg) + terrain = TerrainImporterCfg( + prim_path="/World/ground", + terrain_type="plane", + collision_group=-1, + physics_material=sim_utils.RigidBodyMaterialCfg( + friction_combine_mode="average", + restitution_combine_mode="average", + static_friction=1.0, + dynamic_friction=1.0, + restitution=0.0, + ), + debug_vis=False, + ) + + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg( + num_envs=4096, env_spacing=4.0, replicate_physics=True, clone_in_fabric=True + ) + + # robot + robot: ArticulationCfg = HUMANOID_CFG.replace(prim_path="/World/envs/env_.*/Robot") + joint_gears: list = [ + 67.5000, # left_upper_arm + 67.5000, # left_upper_arm + 45.0000, # left_lower_arm + 67.5000, # lower_waist + 67.5000, # lower_waist + 67.5000, # pelvis + 45.0000, # left_thigh: x + 135.0000, # left_thigh: y + 45.0000, # left_thigh: z + 90.0000, # left_knee + 22.5, # left_foot + 22.5, # left_foot + 45.0000, # right_thigh: x + 135.0000, # right_thigh: y + 45.0000, # right_thigh: z + 90.0000, # right_knee + 22.5, # right_foot + 22.5, # right_foot + 67.5000, # right_upper_arm + 67.5000, # right_upper_arm + 45.0000, # right_lower_arm + ] + + heading_weight: float = 0.5 + up_weight: float = 0.1 + + energy_cost_scale: float = 0.05 + actions_cost_scale: float = 0.01 + alive_reward_scale: float = 2.0 + dof_vel_scale: float = 0.1 + + death_cost: float = -1.0 + termination_height: float = 0.8 + + angular_velocity_scale: float = 0.25 + contact_force_scale: float = 0.01 + + +class HumanoidWarpEnv(LocomotionWarpEnv): + cfg: HumanoidWarpEnvCfg + + def __init__(self, cfg: HumanoidWarpEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py new file mode 100644 index 000000000000..460a30569089 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py new file mode 100644 index 000000000000..8b142cf8e2ee --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py @@ -0,0 +1,980 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +from __future__ import annotations + +from collections.abc import Sequence +from typing import TYPE_CHECKING + +import torch +import warp as wp +from isaaclab_experimental.envs import DirectRLEnvWarp + +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation # , RigidObject +from isaaclab.markers import VisualizationMarkers +from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane + +if TYPE_CHECKING: + from isaaclab_tasks_experimental.direct.allegro_hand.allegro_hand_warp_env_cfg import AllegroHandWarpEnvCfg + + +@wp.kernel +def initialize_rng_state( + # input + seed: wp.int32, + # output + state: wp.array(dtype=wp.uint32), +): + env_id = wp.tid() + state[env_id] = wp.rand_init(seed, wp.int32(env_id)) + + +@wp.kernel +def apply_actions_to_targets( + # input + actions: wp.array2d(dtype=wp.float32), + lower_limits: wp.array2d(dtype=wp.float32), + upper_limits: wp.array2d(dtype=wp.float32), + actuated_dof_indices: wp.array(dtype=wp.int32), + act_moving_average: wp.float32, + # input/output + prev_targets: wp.array2d(dtype=wp.float32), + # output + cur_targets: wp.array2d(dtype=wp.float32), +): + env_id, i = wp.tid() + dof_id = actuated_dof_indices[i] + + # clamp and scale action to target range + a = wp.clamp(actions[env_id, i], wp.float32(-1.0), wp.float32(1.0)) + lower = lower_limits[env_id, dof_id] + upper = upper_limits[env_id, dof_id] + t = scale(a, lower, upper) + + # smoothing and boundary clamping + t = act_moving_average * t + (wp.float32(1.0) - act_moving_average) * prev_targets[env_id, dof_id] + t = wp.clamp(t, lower, upper) + + # update targets + cur_targets[env_id, dof_id] = t + prev_targets[env_id, dof_id] = t + + +@wp.kernel +def reset_target_pose( + # input + env_mask: wp.array(dtype=wp.bool), + x_unit_vec: wp.vec3f, + y_unit_vec: wp.vec3f, + env_origins: wp.array(dtype=wp.vec3f), + goal_pos: wp.array(dtype=wp.vec3f), + # input/output + rng_state: wp.array(dtype=wp.uint32), + # output + goal_rot: wp.array(dtype=wp.quatf), + reset_goal_buf: wp.array(dtype=wp.bool), + goal_pos_w: wp.array(dtype=wp.vec3f), +): + env_id = wp.tid() + if env_mask[env_id]: + rand0 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + rand1 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + + goal_rot[env_id] = randomize_rotation(rand0, rand1, x_unit_vec, y_unit_vec) + reset_goal_buf[env_id] = False + + # Warp-native addition: goal position in world frame. + goal_pos_w[env_id] = goal_pos[env_id] + env_origins[env_id] + + +@wp.kernel +def reset_object( + # input + default_root_pose: wp.array(dtype=wp.transformf), + env_origins: wp.array(dtype=wp.vec3f), + reset_position_noise: wp.float32, + x_unit_vec: wp.vec3f, + y_unit_vec: wp.vec3f, + env_mask: wp.array(dtype=wp.bool), + # input/output + rng_state: wp.array(dtype=wp.uint32), + # output + root_pose_w: wp.array(dtype=wp.transformf), + root_vel_w: wp.array(dtype=wp.spatial_vectorf), +): + env_id = wp.tid() + if env_mask[env_id]: + nx = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + ny = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + nz = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + + pos_noise = reset_position_noise * wp.vec3f(nx, ny, nz) + base_pos = wp.transform_get_translation(default_root_pose[env_id]) + pos_w = base_pos + env_origins[env_id] + pos_noise + + rand0 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + rand1 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + rot_w = randomize_rotation(rand0, rand1, x_unit_vec, y_unit_vec) + + # The following should be equivalent, but consider using write_root_pose_to_sim and write_root_velocity_to_sim + root_pose_w[env_id] = wp.transform(pos_w, rot_w) + root_vel_w[env_id] = wp.spatial_vectorf( + wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0) + ) + + +@wp.kernel +def reset_hand( + # input + default_joint_pos: wp.array2d(dtype=wp.float32), + default_joint_vel: wp.array2d(dtype=wp.float32), + lower_limits: wp.array2d(dtype=wp.float32), + upper_limits: wp.array2d(dtype=wp.float32), + reset_dof_pos_noise: wp.float32, + reset_dof_vel_noise: wp.float32, + env_mask: wp.array(dtype=wp.bool), + num_dofs: wp.int32, + # input/output + rng_state: wp.array(dtype=wp.uint32), + # output + joint_pos: wp.array2d(dtype=wp.float32), + joint_vel: wp.array2d(dtype=wp.float32), + prev_targets: wp.array2d(dtype=wp.float32), + cur_targets: wp.array2d(dtype=wp.float32), + hand_dof_targets: wp.array2d(dtype=wp.float32), +): + env_id = wp.tid() + if env_mask[env_id]: + # Each env runs sequentially inside this kernel (avoids RNG races across DOFs). + for dof_id in range(num_dofs): + dof_pos_noise = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + + delta_max = upper_limits[env_id, dof_id] - default_joint_pos[env_id, dof_id] + delta_min = lower_limits[env_id, dof_id] - default_joint_pos[env_id, dof_id] + rand_delta = delta_min + (delta_max - delta_min) * 0.5 * dof_pos_noise + pos = default_joint_pos[env_id, dof_id] + reset_dof_pos_noise * rand_delta + + dof_vel_noise = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + vel = default_joint_vel[env_id, dof_id] + reset_dof_vel_noise * dof_vel_noise + + # The following lines should be equivalent to the following: + # self.hand.write_joint_state_to_sim(dof_pos, dof_vel, env_ids=env_ids) + joint_pos[env_id, dof_id] = pos + joint_vel[env_id, dof_id] = vel + + prev_targets[env_id, dof_id] = pos + cur_targets[env_id, dof_id] = pos + hand_dof_targets[env_id, dof_id] = pos + + +@wp.kernel +def reset_successes( + # input + env_mask: wp.array(dtype=wp.bool), + # output + successes: wp.array(dtype=wp.float32), +): + env_id = wp.tid() + if env_mask[env_id]: + successes[env_id] = wp.float32(0.0) + + +@wp.kernel +def compute_intermediate_values( + # input + body_pos_w: wp.array2d(dtype=wp.vec3f), + body_quat_w: wp.array2d(dtype=wp.quatf), + body_vel_w: wp.array2d(dtype=wp.spatial_vectorf), + finger_bodies: wp.array(dtype=wp.int32), + env_origins: wp.array(dtype=wp.vec3f), + object_root_pose_w: wp.array(dtype=wp.transformf), + object_root_vel_w: wp.array(dtype=wp.spatial_vectorf), + num_fingertips: wp.int32, + # output + fingertip_pos: wp.array2d(dtype=wp.vec3f), + fingertip_rot: wp.array2d(dtype=wp.quatf), + fingertip_velocities: wp.array2d(dtype=wp.spatial_vectorf), + object_pose: wp.array(dtype=wp.transformf), + object_vels: wp.array(dtype=wp.spatial_vectorf), +): + env_id = wp.tid() + + for i in range(num_fingertips): + body_id = finger_bodies[i] + fingertip_pos[env_id, i] = body_pos_w[env_id, body_id] - env_origins[env_id] + fingertip_rot[env_id, i] = body_quat_w[env_id, body_id] + fingertip_velocities[env_id, i] = body_vel_w[env_id, body_id] + + # Store object pose in env-local frame (translation only; orientation unchanged). + pos_w = wp.transform_get_translation(object_root_pose_w[env_id]) + pos = pos_w - env_origins[env_id] + rot = wp.transform_get_rotation(object_root_pose_w[env_id]) + object_pose[env_id] = wp.transform(pos, rot) + object_vels[env_id] = object_root_vel_w[env_id] + + +@wp.kernel +def get_dones( + # input + max_episode_length: wp.int32, + object_pose: wp.array(dtype=wp.transformf), + in_hand_pos: wp.array(dtype=wp.vec3f), + goal_rot: wp.array(dtype=wp.quatf), + fall_dist: wp.float32, + success_tolerance: wp.float32, + max_consecutive_success: wp.int32, + successes: wp.array(dtype=wp.float32), + # input/output + episode_length_buf: wp.array(dtype=wp.int32), + # output + out_of_reach: wp.array(dtype=wp.bool), + time_out: wp.array(dtype=wp.bool), + reset: wp.array(dtype=wp.bool), +): + env_id = wp.tid() + + object_pos = wp.transform_get_translation(object_pose[env_id]) + object_rot = wp.transform_get_rotation(object_pose[env_id]) + + goal_dist = wp.length(object_pos - in_hand_pos[env_id]) + out_of_reach[env_id] = goal_dist >= fall_dist + + max_success_reached = False + if max_consecutive_success > 0: + # Reset progress (episode length buf) on goal envs if max_consecutive_success > 0 + rot_dist = rotation_distance(object_rot, goal_rot[env_id]) + if wp.abs(rot_dist) <= success_tolerance: + episode_length_buf[env_id] = 0 + max_success_reached = successes[env_id] >= wp.float32(max_consecutive_success) + + time_out[env_id] = episode_length_buf[env_id] >= (max_episode_length - 1) or max_success_reached + reset[env_id] = out_of_reach[env_id] or time_out[env_id] + + +@wp.kernel +def compute_reduced_observations( + # input + fingertip_pos: wp.array2d(dtype=wp.vec3f), + object_pose: wp.array(dtype=wp.transformf), + goal_rot: wp.array(dtype=wp.quatf), + actions: wp.array2d(dtype=wp.float32), + num_fingertips: wp.int32, + action_dim: wp.int32, + # output + observations: wp.array2d(dtype=wp.float32), +): + env_id = wp.tid() + + obj_pos = wp.transform_get_translation(object_pose[env_id]) + obj_rot = wp.transform_get_rotation(object_pose[env_id]) + + idx = int(0) + for i in range(num_fingertips): + observations[env_id, idx + 0] = fingertip_pos[env_id, i][0] + observations[env_id, idx + 1] = fingertip_pos[env_id, i][1] + observations[env_id, idx + 2] = fingertip_pos[env_id, i][2] + idx += 3 + + observations[env_id, idx + 0] = obj_pos[0] + observations[env_id, idx + 1] = obj_pos[1] + observations[env_id, idx + 2] = obj_pos[2] + idx += 3 + + rel = obj_rot * wp.quat_inverse(goal_rot[env_id]) + observations[env_id, idx + 0] = rel[0] + observations[env_id, idx + 1] = rel[1] + observations[env_id, idx + 2] = rel[2] + observations[env_id, idx + 3] = rel[3] + idx += 4 + + for i in range(action_dim): + observations[env_id, idx + i] = actions[env_id, i] + + +@wp.kernel +def compute_full_observations( + # input + hand_dof_pos: wp.array2d(dtype=wp.float32), + hand_dof_vel: wp.array2d(dtype=wp.float32), + hand_dof_lower_limits: wp.array2d(dtype=wp.float32), + hand_dof_upper_limits: wp.array2d(dtype=wp.float32), + vel_obs_scale: wp.float32, + object_pose: wp.array(dtype=wp.transformf), + object_vels: wp.array(dtype=wp.spatial_vectorf), + in_hand_pos: wp.array(dtype=wp.vec3f), + goal_rot: wp.array(dtype=wp.quatf), + fingertip_pos: wp.array2d(dtype=wp.vec3f), + fingertip_rot: wp.array2d(dtype=wp.quatf), + fingertip_velocities: wp.array2d(dtype=wp.spatial_vectorf), + actions: wp.array2d(dtype=wp.float32), + num_hand_dofs: wp.int32, + num_fingertips: wp.int32, + action_dim: wp.int32, + # output + observations: wp.array2d(dtype=wp.float32), +): + env_id = wp.tid() + + # hand + for i in range(num_hand_dofs): + observations[env_id, i] = unscale( + hand_dof_pos[env_id, i], hand_dof_lower_limits[env_id, i], hand_dof_upper_limits[env_id, i] + ) + + offset = num_hand_dofs + for i in range(num_hand_dofs): + observations[env_id, offset + i] = vel_obs_scale * hand_dof_vel[env_id, i] + offset += num_hand_dofs + + # object + obj_pos = wp.transform_get_translation(object_pose[env_id]) + obj_rot = wp.transform_get_rotation(object_pose[env_id]) + + observations[env_id, offset + 0] = obj_pos[0] + observations[env_id, offset + 1] = obj_pos[1] + observations[env_id, offset + 2] = obj_pos[2] + offset += 3 + + observations[env_id, offset + 0] = obj_rot[0] + observations[env_id, offset + 1] = obj_rot[1] + observations[env_id, offset + 2] = obj_rot[2] + observations[env_id, offset + 3] = obj_rot[3] + offset += 4 + + # spatial_vectorf layout: [0:3]=angular, [3:6]=linear + # torch reference order: linear (unscaled) first, then angular (scaled) + observations[env_id, offset + 0] = object_vels[env_id][3] + observations[env_id, offset + 1] = object_vels[env_id][4] + observations[env_id, offset + 2] = object_vels[env_id][5] + offset += 3 + + observations[env_id, offset + 0] = vel_obs_scale * object_vels[env_id][0] + observations[env_id, offset + 1] = vel_obs_scale * object_vels[env_id][1] + observations[env_id, offset + 2] = vel_obs_scale * object_vels[env_id][2] + offset += 3 + + # goal + observations[env_id, offset + 0] = in_hand_pos[env_id][0] + observations[env_id, offset + 1] = in_hand_pos[env_id][1] + observations[env_id, offset + 2] = in_hand_pos[env_id][2] + offset += 3 + + observations[env_id, offset + 0] = goal_rot[env_id][0] + observations[env_id, offset + 1] = goal_rot[env_id][1] + observations[env_id, offset + 2] = goal_rot[env_id][2] + observations[env_id, offset + 3] = goal_rot[env_id][3] + offset += 4 + + rel = obj_rot * wp.quat_inverse(goal_rot[env_id]) + observations[env_id, offset + 0] = rel[0] + observations[env_id, offset + 1] = rel[1] + observations[env_id, offset + 2] = rel[2] + observations[env_id, offset + 3] = rel[3] + offset += 4 + + # fingertips + for i in range(num_fingertips): + observations[env_id, offset + 0] = fingertip_pos[env_id, i][0] + observations[env_id, offset + 1] = fingertip_pos[env_id, i][1] + observations[env_id, offset + 2] = fingertip_pos[env_id, i][2] + offset += 3 + + for i in range(num_fingertips): + observations[env_id, offset + 0] = fingertip_rot[env_id, i][0] + observations[env_id, offset + 1] = fingertip_rot[env_id, i][1] + observations[env_id, offset + 2] = fingertip_rot[env_id, i][2] + observations[env_id, offset + 3] = fingertip_rot[env_id, i][3] + offset += 4 + + for i in range(num_fingertips): + for j in range(6): + observations[env_id, offset + j] = fingertip_velocities[env_id, i][j] + offset += 6 + + # actions + for i in range(action_dim): + observations[env_id, offset + i] = actions[env_id, i] + + +@wp.kernel +def sanitize_and_print_once( + # input/output + obs: wp.array(dtype=wp.float32), + printed_flag: wp.array(dtype=wp.int32), +): + i = wp.tid() + v = obs[i] + + if not wp.isfinite(v): + # Try to claim the "print token" + if wp.atomic_cas(printed_flag, 0, 0, 1) == 0: + wp.printf("Non-finite values in observations") + + obs[i] = wp.float32(0.0) + + +@wp.kernel +def compute_rewards( + # input + reset_buf: wp.array(dtype=wp.bool), + object_pose: wp.array(dtype=wp.transformf), + target_pos: wp.array(dtype=wp.vec3f), + target_rot: wp.array(dtype=wp.quatf), + dist_reward_scale: wp.float32, + rot_reward_scale: wp.float32, + rot_eps: wp.float32, + actions: wp.array2d(dtype=wp.float32), + action_penalty_scale: wp.float32, + success_tolerance: wp.float32, + reach_goal_bonus: wp.float32, + fall_dist: wp.float32, + fall_penalty: wp.float32, + action_dim: wp.int32, + # input/output + reset_goal_buf: wp.array(dtype=wp.bool), + successes: wp.array(dtype=wp.float32), + num_resets_out: wp.array(dtype=wp.float32), + finished_cons_successes_out: wp.array(dtype=wp.float32), + # output + reward_out: wp.array(dtype=wp.float32), +): + env_id = wp.tid() + + obj_pos = wp.transform_get_translation(object_pose[env_id]) + obj_rot = wp.transform_get_rotation(object_pose[env_id]) + + goal_dist = wp.length(obj_pos - target_pos[env_id]) + rot_dist = rotation_distance(obj_rot, target_rot[env_id]) + + dist_rew = goal_dist * dist_reward_scale + rot_rew = wp.float32(1.0) / (wp.abs(rot_dist) + rot_eps) * rot_reward_scale + + action_penalty = wp.float32(0.0) + for i in range(action_dim): + action_penalty += actions[env_id, i] * actions[env_id, i] + + # Total reward is: position distance + orientation alignment + action regularization + success bonus + fall penalty + reward = dist_rew + rot_rew + action_penalty * action_penalty_scale + + # Find out which envs hit the goal and update successes count + reached = wp.abs(rot_dist) <= success_tolerance + goal_resets = reached or reset_goal_buf[env_id] + reset_goal_buf[env_id] = goal_resets + if goal_resets: + successes[env_id] = successes[env_id] + wp.float32(1.0) + + # Success bonus: orientation is within `success_tolerance` of goal orientation + if goal_resets: + reward = reward + reach_goal_bonus + + # Fall penalty: distance to the goal is larger than a threshold + if goal_dist >= fall_dist: + reward = reward + fall_penalty + + # Consecutive-successes stats (mirrors Torch env): + # resets = torch.where(goal_dist >= fall_dist, ones_like(reset_buf), reset_buf) + resets = (goal_dist >= fall_dist) or reset_buf[env_id] + if resets: + wp.atomic_add(num_resets_out, 0, wp.float32(1.0)) + wp.atomic_add(finished_cons_successes_out, 0, successes[env_id]) + + reward_out[env_id] = reward + + +@wp.kernel +def update_consecutive_successes_from_stats( + # input + num_resets: wp.array(dtype=wp.float32), + finished_cons_successes: wp.array(dtype=wp.float32), + av_factor: wp.float32, + # input/output + consecutive_successes: wp.array(dtype=wp.float32), +): + """Finalize the Torch env's EMA update for consecutive_successes and clear the accumulators.""" + # single-thread kernel (dim=1) + n = num_resets[0] + prev = consecutive_successes[0] + if n > wp.float32(0.0): + consecutive_successes[0] = av_factor * (finished_cons_successes[0] / n) + (wp.float32(1.0) - av_factor) * prev + + +@wp.func +def scale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32: + return wp.float32(0.5) * (x + wp.float32(1.0)) * (upper - lower) + lower + + +@wp.func +def unscale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32: + return (wp.float32(2.0) * x - upper - lower) / (upper - lower) + + +@wp.func +def randomize_rotation(rand0: wp.float32, rand1: wp.float32, x_axis: wp.vec3f, y_axis: wp.vec3f) -> wp.quatf: + return wp.quat_from_axis_angle(x_axis, rand0 * wp.pi) * wp.quat_from_axis_angle(y_axis, rand1 * wp.pi) + + +@wp.func +def rotation_distance(object_rot: wp.quatf, target_rot: wp.quatf) -> wp.float32: + # Orientation alignment for the cube in hand and goal cube + quat_diff = object_rot * wp.quat_inverse(target_rot) + # Match Torch env convention: uses indices [1:4] for the vector part (see `rotation_distance` in Torch env). + v_norm = wp.sqrt(quat_diff[1] * quat_diff[1] + quat_diff[2] * quat_diff[2] + quat_diff[3] * quat_diff[3]) + v_norm = wp.min(v_norm, wp.float32(1.0)) + return wp.float32(2.0) * wp.asin(v_norm) + + +class InHandManipulationWarpEnv(DirectRLEnvWarp): + cfg: AllegroHandWarpEnvCfg # | ShadowHandWarpEnvCfg + + # def __init__(self, cfg: AllegroHandWarpEnvCfg | ShadowHandWarpEnvCfg, render_mode: str | None = None, **kwargs): + def __init__(self, cfg: AllegroHandWarpEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) + + # --------------------------------------------------------------------- + # Constants + # --------------------------------------------------------------------- + + # dof used for joint related init and sample + self.num_hand_dofs = self.hand.num_joints + + # list of actuated joints + actuated_dof_indices: list[int] = list() + for joint_name in cfg.actuated_joint_names: + actuated_dof_indices.append(self.hand.joint_names.index(joint_name)) + actuated_dof_indices.sort() + self.num_actuated_dofs = len(actuated_dof_indices) + + # Warp index/mask helpers for kernels and articulation APIs. + self.actuated_dof_indices = wp.array(actuated_dof_indices, dtype=wp.int32, device=self.device) + actuated_mask = [False] * self.num_hand_dofs + for idx in actuated_dof_indices: + actuated_mask[idx] = True + self.actuated_dof_mask = wp.array(actuated_mask, dtype=wp.bool, device=self.device) + + # finger bodies + finger_bodies: list[int] = list() + for body_name in self.cfg.fingertip_body_names: + finger_bodies.append(self.hand.body_names.index(body_name)) + finger_bodies.sort() + self.num_fingertips = len(finger_bodies) + self.finger_bodies = wp.array(finger_bodies, dtype=wp.int32, device=self.device) + + # joint limits + self.hand_dof_lower_limits = self.hand.data.joint_pos_limits_lower + self.hand_dof_upper_limits = self.hand.data.joint_pos_limits_upper + + # unit vectors + self.x_unit_vec = wp.vec3f(1.0, 0.0, 0.0) + self.y_unit_vec = wp.vec3f(0.0, 1.0, 0.0) + self.z_unit_vec = wp.vec3f(0.0, 0.0, 1.0) + + # Per-env origins (Warp view for kernels; Torch env uses `self.scene.env_origins` directly). + self.env_origins = wp.from_torch(self.scene.env_origins, dtype=wp.vec3f) + + # --------------------------------------------------------------------- + # Warp buffers + # --------------------------------------------------------------------- + + # buffers for position targets + self.hand_dof_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device) + self.prev_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device) + self.cur_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device) + + # track goal resets + self.reset_goal_buf = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device) + # used to compare object position + self.in_hand_pos = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device) + # default goal positions + self.goal_rot = wp.zeros(self.num_envs, dtype=wp.quatf, device=self.device) + self.goal_pos = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device) + self.goal_pos_w = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device) + + # Initialize goal constants from Torch (avoid a one-off kernel launch). + default_root_pose = wp.to_torch(self.object.data.default_root_pose).to(self.device) + in_hand_pos = default_root_pose[:, 0:3].clone() + in_hand_pos[:, 2] -= 0.04 + self.in_hand_pos.assign(wp.from_torch(in_hand_pos, dtype=wp.vec3f)) + + goal_pos = torch.tensor([-0.2, -0.45, 0.68], device=self.device, dtype=torch.float32).repeat((self.num_envs, 1)) + self.goal_pos.assign(wp.from_torch(goal_pos, dtype=wp.vec3f)) + + goal_rot = torch.zeros((self.num_envs, 4), device=self.device, dtype=torch.float32) + goal_rot[:, 3] = 1.0 # (x, y, z, w) + self.goal_rot.assign(wp.from_torch(goal_rot, dtype=wp.quatf)) + + # initialize goal marker + self.goal_markers = VisualizationMarkers(self.cfg.goal_object_cfg) + + # Reduction buffers for consecutive_successes update (Warp-only). + self._num_resets = wp.zeros(1, dtype=wp.float32, device=self.device) + self._finished_cons_successes = wp.zeros(1, dtype=wp.float32, device=self.device) + # track successes + self.successes = wp.zeros(self.num_envs, dtype=wp.float32, device=self.device) + self.consecutive_successes = wp.zeros(1, dtype=wp.float32, device=self.device) + + # Persistent RL buffers (Warp). + self.actions = wp.zeros((self.num_envs, self.cfg.action_space), dtype=wp.float32, device=self.device) + self.observations = wp.zeros((self.num_envs, self.cfg.observation_space), dtype=wp.float32, device=self.device) + self.rewards = wp.zeros((self.num_envs,), dtype=wp.float32, device=self.device) + # Flag used as a print token for non-finite observations (Warp). + self.obs_nonfinite_flag = wp.zeros(1, dtype=wp.int32, device=self.device) + + # Intermediate values (Warp) -- mirrors the Torch env's `_compute_intermediate_values` fields. + self.fingertip_pos = wp.zeros((self.num_envs, self.num_fingertips), dtype=wp.vec3f, device=self.device) + self.fingertip_rot = wp.zeros((self.num_envs, self.num_fingertips), dtype=wp.quatf, device=self.device) + self.fingertip_velocities = wp.zeros( + (self.num_envs, self.num_fingertips), dtype=wp.spatial_vectorf, device=self.device + ) + + self.object_pose = wp.zeros(self.num_envs, dtype=wp.transformf, device=self.device) + self.object_vels = wp.zeros(self.num_envs, dtype=wp.spatial_vectorf, device=self.device) + + # RNG state (per-env) for randomizations in reset/goal resets. + self.rng_state = wp.zeros(self.num_envs, dtype=wp.uint32, device=self.device) + if self.cfg.seed is None: + self.cfg.seed = -1 + wp.launch( + initialize_rng_state, + dim=self.num_envs, + inputs=[ + self.cfg.seed, + self.rng_state, + ], + device=self.device, + ) + + # --------------------------------------------------------------------- + # Torch views / aliases + # --------------------------------------------------------------------- + + # Bind torch buffers to warp buffers (same pattern as Warp Cartpole). + self.torch_obs_buf = wp.to_torch(self.observations) + self.torch_reward_buf = wp.to_torch(self.rewards) + self.torch_reset_terminated = wp.to_torch(self.reset_terminated) + self.torch_reset_time_outs = wp.to_torch(self.reset_time_outs) + self.torch_episode_length_buf = self.episode_length_buf # already a torch tensor via wp.to_torch + + def _setup_scene(self): + # add hand, in-hand object, and goal object + self.hand = Articulation(self.cfg.robot_cfg) + self.object = Articulation(self.cfg.object_cfg) + # add ground plane + spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg()) + # clone and replicate (no need to filter for this environment) + self.scene.clone_environments(copy_from_source=False) + # add articulation to scene - we must register to scene to randomize with EventManager + self.scene.articulations["robot"] = self.hand + self.scene.articulations["object"] = self.object + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: wp.array) -> None: + # Store actions in a persistent Warp buffer (analogous to `actions.clone()` in the Torch env). + wp.copy(self.actions, actions) + + def _apply_action(self) -> None: + wp.launch( + apply_actions_to_targets, + dim=(self.num_envs, self.num_actuated_dofs), + inputs=[ + self.actions, + self.hand_dof_lower_limits, + self.hand_dof_upper_limits, + self.actuated_dof_indices, + self.cfg.act_moving_average, + self.prev_targets, + self.cur_targets, + ], + device=self.device, + ) + + # Apply position targets using mask method (CUDA graph safe). + # All joints are actuated for Allegro, so default masks (None = all) are correct. + self.hand.set_joint_position_target_mask(target=self.cur_targets) + + def _get_observations(self) -> dict: + # if self.cfg.asymmetric_obs: + # self.fingertip_force_sensors = self.hand.root_physx_view.get_link_incoming_joint_force()[ + # :, self.finger_bodies + # ] + if self.cfg.obs_type == "openai": + self.compute_reduced_observations() + elif self.cfg.obs_type == "full": + self.compute_full_observations() + else: + raise ValueError(f"Unknown obs_type: {self.cfg.obs_type}") + return {"policy": self.torch_obs_buf} + + def _get_rewards(self) -> None: + # Clear reduction buffers before launching the reward kernel. + # wp.assign(self._num_resets, 0.0) + # wp.assign(self._finished_cons_successes, 0.0) + self._num_resets.zero_() + self._finished_cons_successes.zero_() + wp.launch( + compute_rewards, + dim=self.num_envs, + inputs=[ + self.reset_buf, + self.object_pose, + self.in_hand_pos, + self.goal_rot, + self.cfg.dist_reward_scale, + self.cfg.rot_reward_scale, + self.cfg.rot_eps, + self.actions, + self.cfg.action_penalty_scale, + self.cfg.success_tolerance, + self.cfg.reach_goal_bonus, + self.cfg.fall_dist, + self.cfg.fall_penalty, + self.cfg.action_space, + self.reset_goal_buf, + self.successes, + self._num_resets, + self._finished_cons_successes, + self.rewards, + ], + device=self.device, + ) + + # A separate kernel is needed as Warp does not support thread synchronization for reductions. + wp.launch( + update_consecutive_successes_from_stats, + dim=1, + inputs=[ + self._num_resets, + self._finished_cons_successes, + self.cfg.av_factor, + self.consecutive_successes, + ], + device=self.device, + ) + + if "log" not in self.extras: + self.extras["log"] = dict() + # .mean() cannot be called here as it causes problems on stream + self.extras["log"]["consecutive_successes"] = wp.to_torch(self.consecutive_successes) + + # Reset goals for envs that reached the target (mask is `reset_goal_buf`). + # This avoids Torch-side index extraction and keeps the step graphable. + self._reset_target_pose(mask=self.reset_goal_buf) + + def _get_dones(self) -> None: + self._compute_intermediate_values() + + wp.launch( + get_dones, + dim=self.num_envs, + inputs=[ + self.max_episode_length, + self.object_pose, + self.in_hand_pos, + self.goal_rot, + self.cfg.fall_dist, + self.cfg.success_tolerance, + self.cfg.max_consecutive_success, + self.successes, + self._episode_length_buf_wp, + self.reset_terminated, + self.reset_time_outs, + self.reset_buf, + ], + device=self.device, + ) + + def _reset_idx(self, mask: wp.array | None = None): + if mask is None: + mask = self._ALL_ENV_MASK + + # resets articulation and rigid body attributes + super()._reset_idx(mask) + + # reset goals + self._reset_target_pose(mask=mask) + + # reset object + wp.launch( + reset_object, + dim=self.num_envs, + inputs=[ + self.object.data.default_root_pose, + self.env_origins, + self.cfg.reset_position_noise, + self.x_unit_vec, + self.y_unit_vec, + mask, + self.rng_state, + self.object.data.root_link_pose_w, + self.object.data.root_com_vel_w, + ], + device=self.device, + ) + + # reset hand + wp.launch( + reset_hand, + dim=self.num_envs, + inputs=[ + self.hand.data.default_joint_pos, + self.hand.data.default_joint_vel, + self.hand_dof_lower_limits, + self.hand_dof_upper_limits, + self.cfg.reset_dof_pos_noise, + self.cfg.reset_dof_vel_noise, + mask, + self.num_hand_dofs, + self.rng_state, + self.hand.data.joint_pos, + self.hand.data.joint_vel, + self.prev_targets, + self.cur_targets, + self.hand_dof_targets, + ], + device=self.device, + ) + + self.hand.set_joint_position_target_mask(target=self.cur_targets, env_mask=mask) + + wp.launch( + reset_successes, + dim=self.num_envs, + inputs=[ + mask, + self.successes, + ], + device=self.device, + ) + + self._compute_intermediate_values() + + def _reset_target_pose(self, env_ids: Sequence[int] | None = None, mask: wp.array | None = None): + # reset goal rotation + if mask is None: + if env_ids is None: + return + env_mask_list = [False] * self.num_envs + for env_id in env_ids: + env_mask_list[int(env_id)] = True + mask = wp.array(env_mask_list, dtype=wp.bool, device=self.device) + + # update goal pose and markers + wp.launch( + reset_target_pose, + dim=self.num_envs, + inputs=[ + mask, + self.x_unit_vec, + self.y_unit_vec, + self.env_origins, + self.goal_pos, + self.rng_state, + self.goal_rot, + self.reset_goal_buf, + self.goal_pos_w, + ], + device=self.device, + ) + + def _post_step_visualize(self) -> None: + """Update goal markers outside CUDA graph scope.""" + self.goal_markers.visualize(wp.to_torch(self.goal_pos_w), wp.to_torch(self.goal_rot)) + + def _compute_intermediate_values(self): + # data for hand/object (Warp version of the Torch env's `_compute_intermediate_values`) + wp.launch( + compute_intermediate_values, + dim=self.num_envs, + inputs=[ + self.hand.data.body_pos_w, + self.hand.data.body_quat_w, + self.hand.data.body_vel_w, + self.finger_bodies, + self.env_origins, + self.object.data.root_link_pose_w, + self.object.data.root_com_vel_w, + self.num_fingertips, + self.fingertip_pos, + self.fingertip_rot, + self.fingertip_velocities, + self.object_pose, + self.object_vels, + ], + device=self.device, + ) + + def compute_reduced_observations(self): + # Per https://arxiv.org/pdf/1808.00177.pdf Table 2 + # Fingertip positions + # Object Position, but not orientation + # Relative target orientation + wp.launch( + compute_reduced_observations, + dim=self.num_envs, + inputs=[ + self.fingertip_pos, + self.object_pose, + self.goal_rot, + self.actions, + self.num_fingertips, + self.cfg.action_space, + self.observations, + ], + device=self.device, + ) + # Warp-native non-finite sanitization + print-once. + wp.launch( + sanitize_and_print_once, + dim=(self.num_envs * self.cfg.observation_space), + inputs=[self.observations.flatten(), self.obs_nonfinite_flag], + device=self.device, + ) + self.obs_nonfinite_flag.zero_() + + def compute_full_observations(self): + wp.launch( + compute_full_observations, + dim=self.num_envs, + inputs=[ + self.hand.data.joint_pos, + self.hand.data.joint_vel, + self.hand_dof_lower_limits, + self.hand_dof_upper_limits, + self.cfg.vel_obs_scale, + self.object_pose, + self.object_vels, + self.in_hand_pos, + self.goal_rot, + self.fingertip_pos, + self.fingertip_rot, + self.fingertip_velocities, + self.actions, + self.num_hand_dofs, + self.num_fingertips, + self.cfg.action_space, + self.observations, + ], + device=self.device, + ) + # Warp-native non-finite sanitization + print-once. + wp.launch( + sanitize_and_print_once, + dim=(self.num_envs * self.cfg.observation_space), + inputs=[self.observations.flatten(), self.obs_nonfinite_flag], + device=self.device, + ) + self.obs_nonfinite_flag.zero_() diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/__init__.py new file mode 100644 index 000000000000..460a30569089 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/locomotion_env_warp.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/locomotion_env_warp.py new file mode 100644 index 000000000000..6a726eb035ef --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/locomotion_env_warp.py @@ -0,0 +1,575 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import warp as wp +from isaaclab_experimental.envs import DirectRLEnvWarp + +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation +from isaaclab.envs import DirectRLEnvCfg + + +@wp.func +def safe_normalize(v: wp.vec3f) -> wp.vec3f: + """Normalize a vector with epsilon to avoid NaN on zero-length vectors.""" + length = wp.max(wp.length(v), 1e-9) + return v / length + + +@wp.func +def fmod(x: wp.float32, y: wp.float32) -> wp.float32: + return x - y * wp.floor(x / y) + + +@wp.func +def euler_from_quat(q: wp.quatf) -> wp.vec3f: + sinr_cosp = 2.0 * (q[3] * q[0] + q[1] * q[2]) + cosr_cosp = q[3] * q[3] - q[0] * q[0] - q[1] * q[1] + q[2] * q[2] + sinp = 2.0 * (q[3] * q[1] - q[2] * q[0]) + siny_cosp = 2.0 * (q[3] * q[2] + q[0] * q[1]) + cosy_cosp = q[3] * q[3] + q[0] * q[0] - q[1] * q[1] - q[2] * q[2] + roll = wp.atan2(sinr_cosp, cosr_cosp) + if wp.abs(sinp) >= 1: + pitch = wp.sign(sinp) * wp.pi / 2.0 + else: + pitch = wp.asin(sinp) + yaw = wp.atan2(siny_cosp, cosy_cosp) + + return wp.vec3f( + fmod(roll, wp.static(2.0 * wp.pi)), + fmod(pitch, wp.static(2.0 * wp.pi)), + fmod(yaw, wp.static(2.0 * wp.pi)), + ) + + +@wp.kernel +def get_dones( + episode_length_buf: wp.array(dtype=wp.int32), + torso_pose: wp.array(dtype=wp.transformf), + max_episode_length: wp.int32, + termination_height: wp.float32, + out_of_bounds: wp.array(dtype=wp.bool), + time_out: wp.array(dtype=wp.bool), + reset: wp.array(dtype=wp.bool), +): + env_index = wp.tid() + out_of_bounds[env_index] = wp.abs(torso_pose[env_index][2]) < termination_height + time_out[env_index] = episode_length_buf[env_index] >= (max_episode_length - 1) + reset[env_index] = out_of_bounds[env_index] or time_out[env_index] + + +@wp.kernel +def observations( + torso_pose: wp.array(dtype=wp.transformf), + velocity: wp.array(dtype=wp.spatial_vectorf), + rpy: wp.array(dtype=wp.vec3f), + angle_to_target: wp.array(dtype=wp.float32), + up_proj: wp.array(dtype=wp.float32), + heading_proj: wp.array(dtype=wp.float32), + dof_pos_scaled: wp.array2d(dtype=wp.float32), + dof_vel: wp.array2d(dtype=wp.float32), + actions: wp.array2d(dtype=wp.float32), + observations: wp.array2d(dtype=wp.float32), + dof_vel_scale: wp.float32, + angular_velocity_scale: wp.float32, + num_dof: wp.int32, +): + env_index = wp.tid() + observations[env_index, 0] = torso_pose[env_index][2] + observations[env_index, 1] = velocity[env_index][0] + observations[env_index, 2] = velocity[env_index][1] + observations[env_index, 3] = velocity[env_index][2] + observations[env_index, 4] = velocity[env_index][3] * angular_velocity_scale + observations[env_index, 5] = velocity[env_index][4] * angular_velocity_scale + observations[env_index, 6] = velocity[env_index][5] * angular_velocity_scale + observations[env_index, 7] = wp.atan2(wp.sin(rpy[env_index][2]), wp.cos(rpy[env_index][2])) + observations[env_index, 8] = wp.atan2(wp.sin(rpy[env_index][0]), wp.cos(rpy[env_index][0])) + observations[env_index, 9] = wp.atan2(wp.sin(angle_to_target[env_index]), wp.cos(angle_to_target[env_index])) + observations[env_index, 10] = up_proj[env_index] + observations[env_index, 11] = heading_proj[env_index] + + offset_1 = 12 + num_dof + offset_2 = offset_1 + num_dof + + for i in range(num_dof): + observations[env_index, 12 + i] = dof_pos_scaled[env_index, i] + for i in range(num_dof): + observations[env_index, offset_1 + i] = dof_vel[env_index, i] * dof_vel_scale + for i in range(num_dof): + observations[env_index, offset_2 + i] = actions[env_index, i] + + +@wp.func +def translate_transform( + transform: wp.transformf, + translation: wp.vec3f, +) -> wp.transformf: + return wp.transform( + wp.transform_get_translation(transform) + translation, + wp.transform_get_rotation(transform), + ) + + +@wp.kernel +def reset_root( + default_root_pose: wp.array(dtype=wp.transformf), + default_root_vel: wp.array(dtype=wp.spatial_vectorf), + env_origins: wp.array(dtype=wp.vec3f), + dt: wp.float32, + targets: wp.array(dtype=wp.vec3f), + to_targets: wp.array(dtype=wp.vec3f), + potentials: wp.array(dtype=wp.float32), + root_pose: wp.array(dtype=wp.transformf), + root_vel: wp.array(dtype=wp.spatial_vectorf), + env_mask: wp.array(dtype=wp.bool), +): + env_index = wp.tid() + if env_mask[env_index]: + root_pose[env_index] = default_root_pose[env_index] + root_pose[env_index] = translate_transform(root_pose[env_index], env_origins[env_index]) + root_vel[env_index] = default_root_vel[env_index] + to_targets[env_index] = targets[env_index] - wp.transform_get_translation(root_pose[env_index]) + to_targets[env_index][2] = 0.0 + potentials[env_index] = -wp.length(to_targets[env_index]) / dt + + +@wp.kernel +def reset_joints( + default_joint_pos: wp.array2d(dtype=wp.float32), + default_joint_vel: wp.array2d(dtype=wp.float32), + joint_pos: wp.array2d(dtype=wp.float32), + joint_vel: wp.array2d(dtype=wp.float32), + env_mask: wp.array(dtype=wp.bool), +): + env_index, joint_index = wp.tid() + if env_mask[env_index]: + joint_pos[env_index, joint_index] = default_joint_pos[env_index, joint_index] + joint_vel[env_index, joint_index] = default_joint_vel[env_index, joint_index] + + +@wp.func +def heading_reward( + heading_proj: wp.float32, + heading_weight: wp.float32, +) -> wp.float32: + if heading_proj > 0.8: + return heading_weight + else: + return heading_weight * heading_proj / 0.8 + + +@wp.func +def up_reward( + up_proj: wp.float32, + up_weight: wp.float32, +) -> wp.float32: + if up_proj > 0.93: + return up_weight + else: + return 0.0 + + +@wp.func +def progress_reward( + current_value: wp.float32, + prev_value: wp.float32, +) -> wp.float32: + return current_value - prev_value + + +@wp.func +def actions_cost( + actions: wp.array(dtype=wp.float32), +) -> wp.float32: + sum_ = wp.float32(0.0) + for i in range(len(actions)): + sum_ += actions[i] * actions[i] + return sum_ + + +@wp.func +def electricity_cost( + actions: wp.array(dtype=wp.float32), + dof_vel: wp.array(dtype=wp.float32), + dof_vel_scale: wp.float32, + motor_effort_ratio: wp.array(dtype=wp.float32), +) -> wp.float32: + sum_ = wp.float32(0.0) + for i in range(len(actions)): + sum_ += wp.abs(actions[i] * dof_vel[i] * dof_vel_scale) * motor_effort_ratio[i] + return sum_ + + +@wp.func +def dof_at_limit_cost( + dof_pos_scaled: wp.array(dtype=wp.float32), +) -> wp.float32: + sum_ = wp.float32(0.0) + for i in range(len(dof_pos_scaled)): + if dof_pos_scaled[i] > 0.98: + sum_ += 1.0 + return sum_ + + +@wp.kernel +def compute_rewards( + actions: wp.array2d(dtype=wp.float32), + dof_vel: wp.array2d(dtype=wp.float32), + dof_pos_scaled: wp.array2d(dtype=wp.float32), + reset_terminated: wp.array(dtype=wp.bool), + heading_proj: wp.array(dtype=wp.float32), + up_proj: wp.array(dtype=wp.float32), + potentials: wp.array(dtype=wp.float32), + prev_potentials: wp.array(dtype=wp.float32), + motor_effort_ratio: wp.array(dtype=wp.float32), + up_weight: wp.float32, + heading_weight: wp.float32, + actions_cost_scale: wp.float32, + energy_cost_scale: wp.float32, + dof_vel_scale: wp.float32, + death_cost: wp.float32, + alive_reward_scale: wp.float32, + reward: wp.array(dtype=wp.float32), +): + env_index = wp.tid() + if reset_terminated[env_index]: + reward[env_index] = death_cost + else: + reward[env_index] = ( + progress_reward(potentials[env_index], prev_potentials[env_index]) + + alive_reward_scale + + up_reward(up_proj[env_index], up_weight) + + heading_reward(heading_proj[env_index], heading_weight) + - actions_cost_scale * actions_cost(actions[env_index]) + - energy_cost_scale + * electricity_cost(actions[env_index], dof_vel[env_index], dof_vel_scale, motor_effort_ratio) + - dof_at_limit_cost(dof_pos_scaled[env_index]) + ) + + +@wp.kernel +def compute_heading_and_up( + torso_pose: wp.array(dtype=wp.transformf), + targets: wp.array(dtype=wp.vec3f), + dt: wp.float32, + to_targets: wp.array(dtype=wp.vec3f), + up_proj: wp.array(dtype=wp.float32), + heading_proj: wp.array(dtype=wp.float32), + up_vec: wp.array(dtype=wp.vec3f), + heading_vec: wp.array(dtype=wp.vec3f), + potentials: wp.array(dtype=wp.float32), + prev_potentials: wp.array(dtype=wp.float32), +): + env_index = wp.tid() + up_vec[env_index] = wp.quat_rotate(wp.transform_get_rotation(torso_pose[env_index]), wp.static(wp.vec3f(0, 0, 1))) + heading_vec[env_index] = wp.quat_rotate( + wp.transform_get_rotation(torso_pose[env_index]), wp.static(wp.vec3f(1, 0, 0)) + ) + up_proj[env_index] = up_vec[env_index][2] + to_targets[env_index] = targets[env_index] - wp.transform_get_translation(torso_pose[env_index]) + to_targets[env_index][2] = 0.0 + heading_proj[env_index] = wp.dot(heading_vec[env_index], safe_normalize(to_targets[env_index])) + prev_potentials[env_index] = potentials[env_index] + potentials[env_index] = -wp.length(to_targets[env_index]) / dt + + +@wp.func +def spatial_rotate_inv(quat: wp.quatf, vec: wp.spatial_vectorf) -> wp.spatial_vectorf: + return wp.spatial_vector( + wp.quat_rotate_inv(quat, wp.spatial_top(vec)), + wp.quat_rotate_inv(quat, wp.spatial_bottom(vec)), + ) + + +@wp.func +def unscale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32: + return (2.0 * x - upper - lower) / (upper - lower) + + +@wp.kernel +def compute_rot( + torso_pose: wp.array(dtype=wp.transformf), + velocity: wp.array(dtype=wp.spatial_vectorf), + targets: wp.array(dtype=wp.vec3f), + vec_loc: wp.array(dtype=wp.spatial_vectorf), + rpy: wp.array(dtype=wp.vec3f), + angle_to_target: wp.array(dtype=wp.float32), +): + env_index = wp.tid() + vec_loc[env_index] = spatial_rotate_inv(wp.transform_get_rotation(torso_pose[env_index]), velocity[env_index]) + rpy[env_index] = euler_from_quat(wp.transform_get_rotation(torso_pose[env_index])) + angle_to_target[env_index] = ( + wp.atan2(targets[env_index][1] - torso_pose[env_index][1], targets[env_index][0] - torso_pose[env_index][0]) + - rpy[env_index][2] + ) + + +@wp.kernel +def scale_dof_pos( + dof_pos: wp.array2d(dtype=wp.float32), + dof_limits: wp.array2d(dtype=wp.vec2f), + dof_pos_scaled: wp.array2d(dtype=wp.float32), +): + env_index, joint_index = wp.tid() + dof_pos_scaled[env_index, joint_index] = unscale( + dof_pos[env_index, joint_index], dof_limits[env_index, joint_index][0], dof_limits[env_index, joint_index][1] + ) + + +@wp.kernel +def update_actions( + input_actions: wp.array2d(dtype=wp.float32), + actions: wp.array2d(dtype=wp.float32), + joint_gears: wp.array(dtype=wp.float32), + action_scale: wp.float32, +): + env_index, joint_index = wp.tid() + actions[env_index, joint_index] = ( + action_scale * joint_gears[joint_index] * wp.clamp(input_actions[env_index, joint_index], -1.0, 1.0) + ) + + +@wp.kernel +def initialize_state( + env_origins: wp.array(dtype=wp.vec3f), + targets: wp.array(dtype=wp.vec3f), + state: wp.array(dtype=wp.uint32), + seed: wp.int32, +): + env_index = wp.tid() + state[env_index] = wp.rand_init(seed, env_index) + targets[env_index] = env_origins[env_index] + targets[env_index] += wp.static(wp.vec3f(1000.0, 0.0, 0.0)) + + +class LocomotionWarpEnv(DirectRLEnvWarp): + cfg: DirectRLEnvCfg + + def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) + + self.action_scale = self.cfg.action_scale + self.joint_gears = wp.array(self.cfg.joint_gears, dtype=wp.float32, device=self.sim.device) + self.motor_effort_ratio = wp.ones_like(self.joint_gears, device=self.sim.device) + self._joint_dof_idx, _ = self.robot.find_joints(".*") + + # Simulation bindings + # Note: these are direct memory views into the Newton simulation data, they should not be modified directly + self.joint_pos = self.robot.data.joint_pos + self.joint_vel = self.robot.data.joint_vel + self.root_pose_w = self.robot.data.root_pose_w + self.root_vel_w = self.robot.data.root_vel_w + self.soft_joint_pos_limits = self.robot.data.soft_joint_pos_limits + + # Buffers + self.observations = wp.zeros( + (self.num_envs, self.cfg.observation_space), dtype=wp.float32, device=self.sim.device + ) + self.rewards = wp.zeros((self.num_envs), dtype=wp.float32, device=self.sim.device) + self.actions = wp.zeros((self.num_envs, self.robot.num_joints), dtype=wp.float32, device=self.sim.device) + self.states = wp.zeros((self.num_envs), dtype=wp.uint32, device=self.sim.device) + self.potentials = wp.zeros(self.num_envs, dtype=wp.float32, device=self.sim.device) + self.prev_potentials = wp.zeros_like(self.potentials) + self.targets = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device) + self.up_vec = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device) + self.heading_vec = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device) + self.to_targets = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device) + self.up_proj = wp.zeros((self.num_envs), dtype=wp.float32, device=self.sim.device) + self.heading_proj = wp.zeros((self.num_envs), dtype=wp.float32, device=self.sim.device) + self.vec_loc = wp.zeros((self.num_envs), dtype=wp.spatial_vectorf, device=self.sim.device) + self.rpy = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device) + self.angle_to_target = wp.zeros((self.num_envs), dtype=wp.float32, device=self.sim.device) + self.dof_pos_scaled = wp.zeros((self.num_envs, self.robot.num_joints), dtype=wp.float32, device=self.sim.device) + self.env_origins = wp.from_torch(self.scene.env_origins, dtype=wp.vec3f) + self.actions_mapped = wp.zeros((self.num_envs, self.robot.num_joints), dtype=wp.float32, device=self.sim.device) + + # Initial states and targets + if self.cfg.seed is None: + self.cfg.seed = -1 + + wp.launch( + initialize_state, + dim=self.num_envs, + inputs=[ + self.env_origins, + self.targets, + self.states, + self.cfg.seed, + ], + ) + + # Bind torch buffers to warp buffers + self.torch_obs_buf = wp.to_torch(self.observations) + self.torch_reward_buf = wp.to_torch(self.rewards) + self.torch_reset_terminated = wp.to_torch(self.reset_terminated) + self.torch_reset_time_outs = wp.to_torch(self.reset_time_outs) + self.torch_episode_length_buf = self.episode_length_buf # already a torch tensor via wp.to_torch + + def _setup_scene(self) -> None: + self.robot = Articulation(self.cfg.robot) + # add ground plane + self.cfg.terrain.num_envs = self.scene.cfg.num_envs + self.cfg.terrain.env_spacing = self.scene.cfg.env_spacing + self.terrain = self.cfg.terrain.class_type(self.cfg.terrain) + # clone and replicate + self.scene.clone_environments(copy_from_source=False) + # add articulation to scene + self.scene.articulations["robot"] = self.robot + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: wp.array) -> None: + self.actions.assign(actions) + wp.launch( + update_actions, + dim=(self.num_envs, self.robot.num_joints), + inputs=[actions, self.actions_mapped, self.joint_gears, self.action_scale], + ) + + def _apply_action(self) -> None: + self.robot.set_joint_effort_target_mask(target=self.actions_mapped) + + def _compute_intermediate_values(self) -> None: + wp.launch( + compute_heading_and_up, + dim=self.num_envs, + inputs=[ + self.root_pose_w, + self.targets, + self.cfg.sim.dt, + self.to_targets, + self.up_proj, + self.heading_proj, + self.up_vec, + self.heading_vec, + self.potentials, + self.prev_potentials, + ], + ) + + wp.launch( + compute_rot, + dim=self.num_envs, + inputs=[ + self.root_pose_w, + self.root_vel_w, + self.targets, + self.vec_loc, + self.rpy, + self.angle_to_target, + ], + ) + wp.launch( + scale_dof_pos, + dim=(self.num_envs, self.robot.num_joints), + inputs=[ + self.joint_pos, + self.soft_joint_pos_limits, + self.dof_pos_scaled, + ], + ) + + def _get_observations(self) -> dict: + wp.launch( + observations, + dim=self.num_envs, + inputs=[ + self.root_pose_w, + self.vec_loc, + self.rpy, + self.angle_to_target, + self.up_proj, + self.heading_proj, + self.dof_pos_scaled, + self.joint_vel, + self.actions, + self.observations, + self.cfg.dof_vel_scale, + self.cfg.angular_velocity_scale, + self.robot.num_joints, + ], + ) + return {"policy": self.torch_obs_buf} + + def _get_rewards(self) -> None: + wp.launch( + compute_rewards, + dim=self.num_envs, + inputs=[ + self.actions, + self.joint_vel, + self.dof_pos_scaled, + self.reset_terminated, + self.heading_proj, + self.up_proj, + self.potentials, + self.prev_potentials, + self.motor_effort_ratio, + self.cfg.up_weight, + self.cfg.heading_weight, + self.cfg.actions_cost_scale, + self.cfg.energy_cost_scale, + self.cfg.dof_vel_scale, + self.cfg.death_cost, + self.cfg.alive_reward_scale, + self.rewards, + ], + ) + + def _get_dones(self) -> None: + self._compute_intermediate_values() + + wp.launch( + get_dones, + dim=self.num_envs, + inputs=[ + self._episode_length_buf_wp, + self.root_pose_w, + self.max_episode_length, + self.cfg.termination_height, + self.reset_terminated, + self.reset_time_outs, + self.reset_buf, + ], + ) + + def _reset_idx(self, mask: wp.array | None = None): + if mask is None: + mask = self._ALL_ENV_MASK + + super()._reset_idx(mask) + + wp.launch( + reset_root, + dim=self.num_envs, + inputs=[ + self.robot.data.default_root_pose, + self.robot.data.default_root_vel, + self.env_origins, + self.cfg.sim.dt, + self.targets, + self.to_targets, + self.potentials, + self.root_pose_w, + self.root_vel_w, + mask, + ], + ) + wp.launch( + reset_joints, + dim=(self.num_envs, self.robot.num_joints), + inputs=[ + self.robot.data.default_joint_pos, + self.robot.data.default_joint_vel, + self.joint_pos, + self.joint_vel, + mask, + ], + ) + + self._compute_intermediate_values() diff --git a/source/isaaclab_tasks_experimental/pyproject.toml b/source/isaaclab_tasks_experimental/pyproject.toml new file mode 100644 index 000000000000..d90ac3536f16 --- /dev/null +++ b/source/isaaclab_tasks_experimental/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel", "toml"] +build-backend = "setuptools.build_meta" diff --git a/source/isaaclab_tasks_experimental/setup.py b/source/isaaclab_tasks_experimental/setup.py new file mode 100644 index 000000000000..ae77016f8984 --- /dev/null +++ b/source/isaaclab_tasks_experimental/setup.py @@ -0,0 +1,40 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Installation script for the 'isaaclab_tasks_experimental' python package.""" + +import os + +import toml +from setuptools import find_packages, setup + +# Obtain the extension data from the extension.toml file +EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__)) +# Read the extension.toml file +EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml")) + +# Installation operation +setup( + name="isaaclab_tasks_experimental", + author="Isaac Lab Project Developers", + maintainer="Isaac Lab Project Developers", + url=EXTENSION_TOML_DATA["package"]["repository"], + version=EXTENSION_TOML_DATA["package"]["version"], + description=EXTENSION_TOML_DATA["package"]["description"], + keywords=EXTENSION_TOML_DATA["package"]["keywords"], + include_package_data=True, + python_requires=">=3.10", + install_requires=["isaaclab_tasks"], + packages=find_packages(), + classifiers=[ + "Natural Language :: English", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Isaac Sim :: 5.0.0", + "Isaac Sim :: 5.1.0", + "Isaac Sim :: 6.0.0", + ], + zip_safe=False, +) From 7eaaa3f85abe781c3fd8a26fcc7145af6ffafcf0 Mon Sep 17 00:00:00 2001 From: Antoine RICHARD Date: Fri, 13 Mar 2026 13:24:56 +0100 Subject: [PATCH 2/2] Update source/isaaclab_experimental/setup.py Signed-off-by: Antoine RICHARD --- source/isaaclab_experimental/setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/isaaclab_experimental/setup.py b/source/isaaclab_experimental/setup.py index d44ab67d789e..f999fcd026f9 100644 --- a/source/isaaclab_experimental/setup.py +++ b/source/isaaclab_experimental/setup.py @@ -35,10 +35,11 @@ install_requires=INSTALL_REQUIRES, packages=find_packages(), classifiers=[ - "Natural Language :: English", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Isaac Sim :: 5.0.0", + "Isaac Sim :: 5.1.0", + "Isaac Sim :: 6.0.0", ], zip_safe=False, )