From 0949f42b811140f4fd694ab11e5daed6486b7456 Mon Sep 17 00:00:00 2001 From: Jichuan Hu Date: Fri, 27 Feb 2026 01:50:52 -0800 Subject: [PATCH] Adds inhand manipulation warp env and WarpGraphCache utility - Add WarpGraphCache capture-or-replay utility to isaaclab_experimental/utils - Refactor direct_rl_env_warp to use WarpGraphCache, split step_warp_end into pre/post to run write_data_to_sim uncaptured between graph segments --- CONTRIBUTORS.md | 1 + .../reinforcement_learning/rsl_rl/train.py | 3 + .../config/extension.toml | 39 + .../isaaclab_experimental/__init__.py | 20 + .../isaaclab_experimental/envs/__init__.py | 51 + .../envs/direct_rl_env_warp.py | 818 +++++++++++++++ .../envs/interactive_scene_warp.py | 44 + .../envs/utils/__init__.py | 6 + .../envs/utils/spaces.py | 221 ++++ .../isaaclab_experimental/utils/timer.py | 270 +++++ .../utils/warp_graph_cache.py | 43 + source/isaaclab_experimental/pyproject.toml | 3 + source/isaaclab_experimental/setup.py | 53 + source/isaaclab_newton/setup.py | 2 +- .../isaaclab_rl/rsl_rl/vecenv_wrapper.py | 11 +- .../config/extension.toml | 22 + .../docs/README.md | 3 + .../isaaclab_tasks_experimental/__init__.py | 32 + .../direct/__init__.py | 10 + .../direct/allegro_hand/__init__.py | 29 + .../allegro_hand/allegro_hand_warp_env_cfg.py | 136 +++ .../direct/inhand_manipulation/__init__.py | 4 + .../inhand_manipulation_warp_env.py | 980 ++++++++++++++++++ .../pyproject.toml | 3 + source/isaaclab_tasks_experimental/setup.py | 40 + 25 files changed, 2842 insertions(+), 2 deletions(-) create mode 100644 source/isaaclab_experimental/config/extension.toml create mode 100644 source/isaaclab_experimental/isaaclab_experimental/__init__.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/utils/spaces.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/utils/timer.py create mode 100644 source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py create mode 100644 source/isaaclab_experimental/pyproject.toml create mode 100644 source/isaaclab_experimental/setup.py create mode 100644 source/isaaclab_tasks_experimental/config/extension.toml create mode 100644 source/isaaclab_tasks_experimental/docs/README.md create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py create mode 100644 source/isaaclab_tasks_experimental/pyproject.toml create mode 100644 source/isaaclab_tasks_experimental/setup.py diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 201c70bf727e..c67997cf40be 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -93,6 +93,7 @@ Guidelines for modifications: * Ji Yuan Feng * Jia Lin Yuan * Jiakai Zhang +* Jichuan Hu * Jinghuan Shang * Jingzhou Liu * Jinqi Wei diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py index 5d7c90b95447..50d40de0d3b6 100644 --- a/scripts/reinforcement_learning/rsl_rl/train.py +++ b/scripts/reinforcement_learning/rsl_rl/train.py @@ -6,6 +6,7 @@ """Script to train RL agent with RSL-RL.""" import argparse +import contextlib import importlib.metadata as metadata import logging import os @@ -34,6 +35,8 @@ logger = logging.getLogger(__name__) # PLACEHOLDER: Extension template (do not remove this comment) +with contextlib.suppress(ImportError): + import isaaclab_tasks_experimental # noqa: F401 RSL_RL_VERSION = "3.0.1" diff --git a/source/isaaclab_experimental/config/extension.toml b/source/isaaclab_experimental/config/extension.toml new file mode 100644 index 000000000000..d6f50f27f5f4 --- /dev/null +++ b/source/isaaclab_experimental/config/extension.toml @@ -0,0 +1,39 @@ +[package] + +# Note: Semantic Versioning is used: https://semver.org/ +version = "0.0.1" + +# Description +title = "Experimental playground for upcoming IsaacLab features" +description="Provides early access to future features that are not yet `production-ready`." +readme = "docs/README.md" +repository = "https://github.com/isaac-sim/IsaacLab" +category = "robotics" +keywords = ["kit", "robotics", "learning", "ai"] + +[python.pipapi] +requirements = [ + "numpy", + "prettytable==3.3.0", + "toml", + "hidapi", + "gymnasium==0.29.0", + "trimesh" +] + +modules = [ + "numpy", + "prettytable", + "toml", + "hid", + "gymnasium", + "trimesh" +] + +use_online_index=true + +[core] +reloadable = false + +[[python.module]] +name = "isaaclab_experimental" diff --git a/source/isaaclab_experimental/isaaclab_experimental/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/__init__.py new file mode 100644 index 000000000000..18e97e2dd188 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Package containing the core framework.""" + +import os +import toml +from enum import IntEnum + +# Conveniences to other module directories via relative paths +ISAACLAB_EXPERIMENTAL_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) +"""Path to the extension source directory.""" + +ISAACLAB_EXPERIMENTAL_METADATA = toml.load(os.path.join(ISAACLAB_EXPERIMENTAL_EXT_DIR, "config", "extension.toml")) +"""Extension metadata dictionary parsed from the extension.toml file.""" + +# Configure the module-level variables +__version__ = ISAACLAB_EXPERIMENTAL_METADATA["package"]["version"] diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py new file mode 100644 index 000000000000..81c59dda7d51 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py @@ -0,0 +1,51 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Sub-package for environment definitions. + +Environments define the interface between the agent and the simulation. +In the simplest case, the environment provides the agent with the current +observations and executes the actions provided by the agent. However, the +environment can also provide additional information such as the current +reward, done flag, and information about the current episode. + +There are two types of environment designing workflows: + +* **Manager-based**: The environment is decomposed into individual components (or managers) + for different aspects (such as computing observations, applying actions, and applying + randomization. The users mainly configure the managers and the environment coordinates the + managers and calls their functions. +* **Direct**: The user implements all the necessary functionality directly into a single class + directly without the need for additional managers. + +Based on these workflows, there are the following environment classes for single and multi-agent RL: + +**Single-Agent RL:** + +* :class:`ManagerBasedEnv`: The manager-based workflow base environment which only provides the + agent with the current observations and executes the actions provided by the agent. +* :class:`ManagerBasedRLEnv`: The manager-based workflow RL task environment which besides the + functionality of the base environment also provides additional Markov Decision Process (MDP) + related information such as the current reward, done flag, and information. +* :class:`DirectRLEnv`: The direct workflow RL task environment which provides implementations for + implementing scene setup, computing dones, performing resets, and computing reward and observation. + +**Multi-Agent RL (MARL):** + +* :class:`DirectMARLEnv`: The direct workflow MARL task environment which provides implementations for + implementing scene setup, computing dones, performing resets, and computing reward and observation. + +For more information about the workflow design patterns, see the `Task Design Workflows`_ section. + +.. _`Task Design Workflows`: https://isaac-sim.github.io/IsaacLab/source/features/task_workflows.html +""" + +from .direct_rl_env_warp import DirectRLEnvWarp # noqa: F401 +from .interactive_scene_warp import InteractiveSceneWarp # noqa: F401 + +__all__ = [ + "DirectRLEnvWarp", + "InteractiveSceneWarp", +] diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py new file mode 100644 index 000000000000..ebb1acadfcb9 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py @@ -0,0 +1,818 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +from __future__ import annotations + +import contextlib +import inspect +import logging +import math +import os +import weakref +from abc import abstractmethod +from dataclasses import MISSING +from typing import Any, ClassVar + +import gymnasium as gym +import numpy as np +import torch + +# import omni.kit.app +# import omni.log +# import omni.physx +import warp as wp + +from isaaclab.envs.common import VecEnvObs, VecEnvStepReturn +from isaaclab.envs.direct_rl_env_cfg import DirectRLEnvCfg + +# from isaaclab.envs.ui import ViewportCameraController +from isaaclab.managers import EventManager +from isaaclab.sim import SimulationContext +from isaaclab.sim.utils import use_stage +from isaaclab.utils.noise import NoiseModel +from isaaclab.utils.seed import configure_seed + +from isaaclab_experimental.envs.interactive_scene_warp import InteractiveSceneWarp +from isaaclab_experimental.utils.timer import Timer +from isaaclab_experimental.utils.warp_graph_cache import WarpGraphCache + +from .utils.spaces import sample_space, spec_to_gym_space + +# from isaacsim.core.simulation_manager import SimulationManager +# from isaacsim.core.version import get_version + + +# import logger +logger = logging.getLogger(__name__) + +DEBUG_TIMER_STEP = os.environ.get("DEBUG_TIMER_STEP", "0") == "1" +"""Enable outer step() timer only. Set DEBUG_TIMER_STEP=1 env var to enable.""" + +DEBUG_TIMERS = os.environ.get("DEBUG_TIMERS", "0") == "1" +"""Enable all fine-grained inner timers (adds wp.synchronize per sub-phase). Set DEBUG_TIMERS=1 env var to enable.""" + + +@wp.kernel +def zero_mask_int32( + mask: wp.array(dtype=wp.bool), + data: wp.array(dtype=wp.int32), +): + env_index = wp.tid() + if mask[env_index]: + data[env_index] = 0 + + +@wp.kernel +def add_to_env( + data: wp.array(dtype=wp.int32), + value: wp.int32, +): + env_index = wp.tid() + data[env_index] += value + + +class DirectRLEnvWarp(gym.Env): + """The superclass for the direct workflow to design environments. + + This class implements the core functionality for reinforcement learning (RL) + environments. It is designed to be used with any RL library. The class is designed + to be used with vectorized environments, i.e., the environment is expected to be run + in parallel with multiple sub-environments. + + While the environment itself is implemented as a vectorized environment, we do not + inherit from :class:`gym.vector.VectorEnv`. This is mainly because the class adds + various methods (for wait and asynchronous updates) which are not required. + Additionally, each RL library typically has its own definition for a vectorized + environment. Thus, to reduce complexity, we directly use the :class:`gym.Env` over + here and leave it up to library-defined wrappers to take care of wrapping this + environment for their agents. + + Note: + For vectorized environments, it is recommended to **only** call the :meth:`reset` + method once before the first call to :meth:`step`, i.e. after the environment is created. + After that, the :meth:`step` function handles the reset of terminated sub-environments. + in a vectorized environment. + + """ + + is_vector_env: ClassVar[bool] = True + """Whether the environment is a vectorized environment.""" + metadata: ClassVar[dict[str, Any]] = { + "render_modes": [None, "human", "rgb_array"], + # "isaac_sim_version": get_version(), + } + """Metadata for the environment.""" + + def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs): + """Initialize the environment. + + Args: + cfg: The configuration object for the environment. + render_mode: The render mode for the environment. Defaults to None, which + is similar to ``"human"``. + + Raises: + RuntimeError: If a simulation context already exists. The environment must always create one + since it configures the simulation context and controls the simulation. + """ + # check that the config is valid + cfg.validate() + # store inputs to class + self.cfg = cfg + # store the render mode + self.render_mode = render_mode + # initialize internal variables + self._is_closed = False + + # set the seed for the environment + if self.cfg.seed is not None: + self.cfg.seed = self.seed(self.cfg.seed) + else: + logger.warning("Seed not set for the environment. The environment creation may not be deterministic.") + + # create a simulation context to control the simulator + if SimulationContext.instance() is None: + self.sim: SimulationContext = SimulationContext(self.cfg.sim) + else: + raise RuntimeError("Simulation context already exists. Cannot create a new one.") + + # make sure torch is running on the correct device + if "cuda" in self.device: + torch.cuda.set_device(self.device) + + # print useful information + print("[INFO]: Base environment:") + print(f"\tEnvironment device : {self.device}") + print(f"\tEnvironment seed : {self.cfg.seed}") + print(f"\tPhysics step-size : {self.physics_dt}") + print(f"\tRendering step-size : {self.physics_dt * self.cfg.sim.render_interval}") + print(f"\tEnvironment step-size : {self.step_dt}") + + if self.cfg.sim.render_interval < self.cfg.decimation: + msg = ( + f"The render interval ({self.cfg.sim.render_interval}) is smaller than the decimation " + f"({self.cfg.decimation}). Multiple render calls will happen for each environment step." + "If this is not intended, set the render interval to be equal to the decimation." + ) + logger.warning(msg) + + # generate scene + with Timer("[INFO]: Time taken for scene creation", "scene_creation"): + # set the stage context for scene creation steps which use the stage + with use_stage(self.sim.stage): + self.scene = InteractiveSceneWarp(self.cfg.scene) + self._setup_scene() + # attach_stage_to_usd_context() + print("[INFO]: Scene manager: ", self.scene) + + # set up camera viewport controller + # viewport is not available in other rendering modes so the function will throw a warning + # FIXME: This needs to be fixed in the future when we unify the UI functionalities even for + # non-rendering modes. + has_gui = bool(self.sim.get_setting("/isaaclab/has_gui")) + offscreen_render = bool(self.sim.get_setting("/isaaclab/render/offscreen")) + if has_gui or offscreen_render: + # self.viewport_camera_controller = ViewportCameraController(self, self.cfg.viewer) + self.viewport_camera_controller = None + else: + self.viewport_camera_controller = None + + # create event manager + # note: this is needed here (rather than after simulation play) to allow USD-related randomization events + # that must happen before the simulation starts. Example: randomizing mesh scale + if self.cfg.events: + self.event_manager = EventManager(self.cfg.events, self) + + # apply USD-related randomization events + if "prestartup" in self.event_manager.available_modes: + self.event_manager.apply(mode="prestartup") + + # play the simulator to activate physics handles + # note: this activates the physics simulation view that exposes TensorAPIs + # note: when started in extension mode, first call sim.reset_async() and then initialize the managers + # if builtins.ISAAC_LAUNCHED_FROM_TERMINAL is False: + # print("[INFO]: Starting the simulation. This may take a few seconds. Please wait...") + with Timer("[INFO]: Time taken for simulation start", "simulation_start"): + # since the reset can trigger callbacks which use the stage, + # we need to set the stage context here + with use_stage(self.sim.stage): + self.sim.reset() + # update scene to pre populate data buffers for assets and sensors. + # this is needed for the observation manager to get valid tensors for initialization. + # this shouldn't cause an issue since later on, users do a reset over all the + # environments so the lazy buffers would be reset. + self.scene.update(dt=self.physics_dt) + + # check if debug visualization is has been implemented by the environment + source_code = inspect.getsource(self._set_debug_vis_impl) + self.has_debug_vis_implementation = "NotImplementedError" not in source_code + self._debug_vis_handle = None + + # extend UI elements + # we need to do this here after all the managers are initialized + # this is because they dictate the sensors and commands right now + if bool(self.sim.settings.get("/isaaclab/visualizer")) and self.cfg.ui_window_class_type is not None: + self._window = self.cfg.ui_window_class_type(self, window_name="IsaacLab") + else: + # if no window, then we don't need to store the window + self._window = None + + # allocate dictionary to store metrics + self.extras = {} + + # initialize data and constants + # -- counter for simulation steps + self._sim_step_counter = 0 + # -- counter for curriculum + self.common_step_counter = 0 + # -- init buffers + self._episode_length_buf_wp = wp.zeros(self.num_envs, dtype=wp.int32, device=self.device) + self.episode_length_buf = wp.to_torch(self._episode_length_buf_wp) + self.reset_terminated = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device) + self.reset_time_outs = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device) + self.reset_buf = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device) + self._ALL_ENV_MASK = wp.ones(self.num_envs, dtype=wp.bool, device=self.device) + + # Expected bindings: + self.torch_obs_buf: torch.Tensor = None + self.torch_reward_buf: torch.Tensor = None + self.torch_reset_terminated: torch.Tensor = None + self.torch_reset_time_outs: torch.Tensor = None + self.torch_episode_length_buf: torch.Tensor = None + + # Warp CUDA graph cache for capture-or-replay + self._graph_cache = WarpGraphCache() + + # setup the action and observation spaces for Gym + self._configure_gym_env_spaces() + + # setup noise cfg for adding action and observation noise + if self.cfg.action_noise_model: + self._action_noise_model: NoiseModel = self.cfg.action_noise_model.class_type( + self.cfg.action_noise_model, num_envs=self.num_envs, device=self.device + ) + if self.cfg.observation_noise_model: + self._observation_noise_model: NoiseModel = self.cfg.observation_noise_model.class_type( + self.cfg.observation_noise_model, num_envs=self.num_envs, device=self.device + ) + + # perform events at the start of the simulation + if self.cfg.events: + # we print it here to make the logging consistent + print("[INFO] Event Manager: ", self.event_manager) + + if "startup" in self.event_manager.available_modes: + self.event_manager.apply(mode="startup") + + # set the framerate of the gym video recorder wrapper so that the playback speed of the produced + # video matches the simulation + self.metadata["render_fps"] = 1 / self.step_dt + + # print the environment information + print("[INFO]: Completed setting up the environment...") + + def __del__(self): + """Cleanup for the environment.""" + # Suppress errors during Python shutdown to avoid noisy tracebacks + # Note: contextlib may be None during interpreter shutdown + if contextlib is not None: + with contextlib.suppress(ImportError, AttributeError, TypeError): + self.close() + + """ + Properties. + """ + + @property + def num_envs(self) -> int: + """The number of instances of the environment that are running.""" + return self.scene.num_envs + + @property + def physics_dt(self) -> float: + """The physics time-step (in s). + + This is the lowest time-decimation at which the simulation is happening. + """ + return self.cfg.sim.dt + + @property + def step_dt(self) -> float: + """The environment stepping time-step (in s). + + This is the time-step at which the environment steps forward. + """ + return self.cfg.sim.dt * self.cfg.decimation + + @property + def device(self): + """The device on which the environment is running.""" + return self.sim.device + + @property + def max_episode_length_s(self) -> float: + """Maximum episode length in seconds.""" + return self.cfg.episode_length_s + + @property + def max_episode_length(self): + """The maximum episode length in steps adjusted from s.""" + return math.ceil(self.max_episode_length_s / (self.cfg.sim.dt * self.cfg.decimation)) + + """ + Operations. + """ + + def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[VecEnvObs, dict]: + """Resets all the environments and returns observations. + + This function calls the :meth:`_reset_idx` function to reset all the environments. + However, certain operations, such as procedural terrain generation, that happened during initialization + are not repeated. + + Args: + seed: The seed to use for randomization. Defaults to None, in which case the seed is not set. + options: Additional information to specify how the environment is reset. Defaults to None. + + Note: + This argument is used for compatibility with Gymnasium environment definition. + + Returns: + A tuple containing the observations and extras. + """ + # set the seed + if seed is not None: + self.seed(seed) + + # reset state of scene + self._reset_idx(self._ALL_ENV_MASK) + + # update articulation kinematics + self.scene.write_data_to_sim() + + # if sensors are added to the scene, make sure we render to reflect changes in reset + if hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset: + self.sim.render() + + # if self.cfg.wait_for_textures and self.sim.has_rtx_sensors(): + # while SimulationManager.assets_loading(): + # self.sim.render() + + # return observations + self._get_observations() + return {"policy": self.torch_obs_buf.clone()}, self.extras + + @Timer(name="env_step", msg="Step took:", enable=DEBUG_TIMER_STEP or DEBUG_TIMERS) + def step(self, action: torch.Tensor) -> VecEnvStepReturn: + """Execute one time-step of the environment's dynamics. + + The environment steps forward at a fixed time-step, while the physics simulation is decimated at a + lower time-step. This is to ensure that the simulation is stable. These two time-steps can be configured + independently using the :attr:`DirectRLEnvCfg.decimation` (number of simulation steps per environment step) + and the :attr:`DirectRLEnvCfg.sim.physics_dt` (physics time-step). Based on these parameters, the environment + time-step is computed as the product of the two. + + This function performs the following steps: + + 1. Pre-process the actions before stepping through the physics. + 2. Apply the actions to the simulator and step through the physics in a decimated manner. + 3. Compute the reward and done signals. + 4. Reset environments that have terminated or reached the maximum episode length. + 5. Apply interval events if they are enabled. + 6. Compute observations. + + Args: + action: The actions to apply on the environment. Shape is (num_envs, action_dim). + + Returns: + A tuple containing the observations, rewards, resets (terminated and truncated) and extras. + """ + + action = action.to(self.device) + # add action noise + if self.cfg.action_noise_model: + action = self._action_noise_model(action) + + # process actions, #TODO pass the torch tensor directly. + with Timer(name="pre_physics", msg="Pre-physics step took:", enable=DEBUG_TIMERS): + self._pre_physics_step( + wp.from_torch(action) + ) # Creates a tensor and discards it. Not graphable unless training loop reuses the same pointer. + + # check if we need to do rendering within the physics loop + # note: checked here once to avoid multiple checks within the loop + _has_rtx = hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors() + is_rendering = bool(self.sim.settings.get("/isaaclab/visualizer")) or _has_rtx + + # perform physics stepping + with Timer(name="physics_loop", msg="Physics loop took:", enable=DEBUG_TIMERS): + for _ in range(self.cfg.decimation): + self._sim_step_counter += 1 + # set actions into buffers + # simulate + with Timer(name="apply_action", msg="Action processing step took:", enable=DEBUG_TIMERS): + self._graph_cache.capture_or_replay("action", self.step_warp_action) + + # write_data_to_sim runs outside the CUDA graph because _apply_actuator_model + # uses torch ops (wp.to_torch + torch arithmetic) that cross CUDA streams. + with Timer(name="write_data_to_sim_loop", msg="Write data to sim (loop) took:", enable=DEBUG_TIMERS): + self.scene.write_data_to_sim() + + with Timer(name="simulate", msg="Newton simulation step took:", enable=DEBUG_TIMERS): + self.sim.step(render=False) + # render between steps only if the GUI or an RTX sensor needs it + # note: we assume the render interval to be the shortest accepted rendering interval. + # If a camera needs rendering at a faster frequency, this will lead to unexpected behavior. + if self._sim_step_counter % self.cfg.sim.render_interval == 0 and is_rendering: + self.sim.render() + # update buffers at sim dt + with Timer(name="scene_update", msg="Scene update took:", enable=DEBUG_TIMERS): + self.scene.update(dt=self.physics_dt) + + self.common_step_counter += 1 # total step (common for all envs) + with Timer(name="end_pre_graph", msg="End pre-graph took:", enable=DEBUG_TIMERS): + self._graph_cache.capture_or_replay("end_pre", self._step_warp_end_pre) + # write_data_to_sim runs uncaptured — it uses torch ops that cross CUDA streams. + with Timer(name="write_data_to_sim_post", msg="Write data to sim (post-reset) took:", enable=DEBUG_TIMERS): + self.scene.write_data_to_sim() + with Timer(name="end_post_graph", msg="End post-graph took:", enable=DEBUG_TIMERS): + self._graph_cache.capture_or_replay("end_post", self._step_warp_end_post) + + # Visualization hook — runs after CUDA graph scope. Override in subclass + # to update markers or other non-graphable visual elements. + with Timer(name="visualize", msg="Visualize took:", enable=DEBUG_TIMERS): + self._post_step_visualize() + + # return observations, rewards, resets and extras + return ( + {"policy": self.torch_obs_buf.clone()}, + self.torch_reward_buf, + self.torch_reset_terminated, + self.torch_reset_time_outs, + self.extras, + ) + + def _post_step_visualize(self) -> None: + """Hook for updating visualization markers after CUDA graph scope. + + Override in subclass to update markers or other non-graphable visual + elements (e.g., those requiring wp.to_torch + .cpu().numpy()). + This runs every step, outside any CUDA graph capture. + """ + pass + + def step_warp_action(self) -> None: + self._apply_action() + # Note: scene.write_data_to_sim() is called separately outside the CUDA graph + # capture scope because it invokes _apply_actuator_model() which uses torch + # arithmetic (wp.to_torch + torch ops). This would cause a CUDA stream crossing + # error during graph capture. Moving it outside is safe since it runs every step. + + def _step_warp_end_pre(self) -> None: + """Capturable portion before write_data_to_sim (pure warp kernels).""" + wp.launch( + add_to_env, + dim=self.num_envs, + inputs=[ + self._episode_length_buf_wp, + 1, + ], + ) + self._get_dones() + self._get_rewards() + + # -- reset envs that terminated/timed-out and log the episode information + self._reset_idx(mask=self.reset_buf) + + def _step_warp_end_post(self) -> None: + """Capturable portion after write_data_to_sim (pure warp kernels).""" + # if sensors are added to the scene, make sure we render to reflect changes in reset + # if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset: + # self.sim.render() + + # TODO We could split it out. + # post-step: step interval event + # if self.cfg.events: + # if "interval" in self.event_manager.available_modes: + # self.event_manager.apply(mode="interval", dt=self.step_dt) + + # update observations + self._get_observations() + + # add observation noise + # note: we apply no noise to the state space (since it is used for critic networks) + # if self.cfg.observation_noise_model: + # self.obs_buf["policy"] = self._observation_noise_model(self.obs_buf["policy"]) + + @staticmethod + def seed(seed: int = -1) -> int: + """Set the seed for the environment. + + Args: + seed: The seed for random generator. Defaults to -1. + + Returns: + The seed used for random generator. + """ + # set seed for replicator + try: + import omni.replicator.core as rep + + rep.set_global_seed(seed) + except ModuleNotFoundError: + pass + # set seed for torch and other libraries + return configure_seed(seed) + + def render(self, recompute: bool = False) -> np.ndarray | None: + """Run rendering without stepping through the physics. + + By convention, if mode is: + + - **human**: Render to the current display and return nothing. Usually for human consumption. + - **rgb_array**: Return an numpy.ndarray with shape (x, y, 3), representing RGB values for an + x-by-y pixel image, suitable for turning into a video. + + Args: + recompute: Whether to force a render even if the simulator has already rendered the scene. + Defaults to False. + + Returns: + The rendered image as a numpy array if mode is "rgb_array". Otherwise, returns None. + + Raises: + RuntimeError: If mode is set to "rgb_data" and simulation render mode does not support it. + In this case, the simulation render mode must be set to ``RenderMode.PARTIAL_RENDERING`` + or ``RenderMode.FULL_RENDERING``. + NotImplementedError: If an unsupported rendering mode is specified. + """ + # run a rendering step of the simulator + # if we have rtx sensors, we do not need to render again sim + if not (hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors()) and not recompute: + self.sim.render() + # decide the rendering mode + if self.render_mode == "human" or self.render_mode is None: + return None + elif self.render_mode == "rgb_array": + # check that if any render could have happened + has_gui = bool(self.sim.get_setting("/isaaclab/has_gui")) + offscreen_render = bool(self.sim.get_setting("/isaaclab/render/offscreen")) + # Rendering is possible if we have GUI or offscreen rendering enabled + can_render = has_gui or offscreen_render + + if not can_render: + render_mode_name = "NO_GUI_OR_RENDERING" + raise RuntimeError( + f"Cannot render '{self.render_mode}' when the simulation render mode is" + f" '{render_mode_name}'. Please set the simulation render mode" + " to:'PARTIAL_RENDERING' or" + " 'FULL_RENDERING'. If running headless, make" + " sure --enable_cameras is set." + ) + # create the annotator if it does not exist + if not hasattr(self, "_rgb_annotator"): + import omni.replicator.core as rep + + # create render product + self._render_product = rep.create.render_product( + self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution + ) + # create rgb annotator -- used to read data from the render product + self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") + self._rgb_annotator.attach([self._render_product]) + # obtain the rgb data + rgb_data = self._rgb_annotator.get_data() + # convert to numpy array + rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) + # return the rgb data + # note: initially the renerer is warming up and returns empty data + if rgb_data.size == 0: + return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8) + else: + return rgb_data[:, :, :3] + else: + raise NotImplementedError( + f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}." + ) + + def close(self): + """Cleanup for the environment.""" + if not self._is_closed: + # close entities related to the environment + # note: this is order-sensitive to avoid any dangling references + if self.cfg.events: + del self.event_manager + del self.scene + if self.viewport_camera_controller is not None: + del self.viewport_camera_controller + + # # clear callbacks and instance + # if float(".".join(get_version()[2])) >= 5: + # if self.cfg.sim.create_stage_in_memory: + # # detach physx stage + # omni.physx.get_physx_simulation_interface().detach_stage() + # self.sim.stop() + # self.sim.clear() + + # self.sim.clear_all_callbacks() + self.sim.clear_instance() + + # destroy the window + if self._window is not None: + self._window = None + # update closing status + self._is_closed = True + + """ + Operations - Debug Visualization. + """ + + def set_debug_vis(self, debug_vis: bool) -> bool: + """Toggles the environment debug visualization. + + Args: + debug_vis: Whether to visualize the environment debug visualization. + + Returns: + Whether the debug visualization was successfully set. False if the environment + does not support debug visualization. + """ + # check if debug visualization is supported + if not self.has_debug_vis_implementation: + return False + # toggle debug visualization objects + self._set_debug_vis_impl(debug_vis) + # toggle debug visualization handles + if debug_vis: + import omni.kit.app + + # create a subscriber for the post update event if it doesn't exist + if self._debug_vis_handle is None: + app_interface = omni.kit.app.get_app_interface() + self._debug_vis_handle = app_interface.get_post_update_event_stream().create_subscription_to_pop( + lambda event, obj=weakref.proxy(self): obj._debug_vis_callback(event) + ) + else: + # remove the subscriber if it exists + if self._debug_vis_handle is not None: + self._debug_vis_handle.unsubscribe() + self._debug_vis_handle = None + # return success + return True + + """ + Helper functions. + """ + + def _configure_gym_env_spaces(self): + """Configure the action and observation spaces for the Gym environment.""" + # show deprecation message and overwrite configuration + if self.cfg.num_actions is not None: + logger.warning("DirectRLEnvCfg.num_actions is deprecated. Use DirectRLEnvCfg.action_space instead.") + if isinstance(self.cfg.action_space, type(MISSING)): + self.cfg.action_space = self.cfg.num_actions + if self.cfg.num_observations is not None: + logger.warning( + "DirectRLEnvCfg.num_observations is deprecated. Use DirectRLEnvCfg.observation_space instead." + ) + if isinstance(self.cfg.observation_space, type(MISSING)): + self.cfg.observation_space = self.cfg.num_observations + if self.cfg.num_states is not None: + logger.warning("DirectRLEnvCfg.num_states is deprecated. Use DirectRLEnvCfg.state_space instead.") + if isinstance(self.cfg.state_space, type(MISSING)): + self.cfg.state_space = self.cfg.num_states + + # set up spaces + self.single_observation_space = gym.spaces.Dict() + self.single_observation_space["policy"] = spec_to_gym_space(self.cfg.observation_space) + self.single_action_space = spec_to_gym_space(self.cfg.action_space) + + # batch the spaces for vectorized environments + self.observation_space = gym.vector.utils.batch_space(self.single_observation_space["policy"], self.num_envs) + self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs) + + # optional state space for asymmetric actor-critic architectures + self.state_space = None + if self.cfg.state_space: + self.single_observation_space["critic"] = spec_to_gym_space(self.cfg.state_space) + self.state_space = gym.vector.utils.batch_space(self.single_observation_space["critic"], self.num_envs) + + # instantiate actions (needed for tasks for which the observations computation is dependent on the actions) + self.actions = sample_space(self.single_action_space, self.sim.device, batch_size=self.num_envs, fill_value=0) + + def _reset_idx(self, mask: wp.array | None = None): + """Reset environments based on a boolean mask. + + Args: + mask: Boolean mask indicating which environments to reset. + Shape is (num_envs,). If None, all environments are reset. + """ + if mask is None: + mask = self._ALL_ENV_MASK + self.scene.reset(env_ids=None, env_mask=mask) + + # apply events such as randomization for environments that need a reset + # if self.cfg.events: + # if "reset" in self.event_manager.available_modes: + # env_step_count = self._sim_step_counter // self.cfg.decimation + # self.event_manager.apply(mode="reset", env_ids=env_ids, global_env_step_count=env_step_count) + + # reset noise models + # if self.cfg.action_noise_model: + # self._action_noise_model.reset(env_ids) + # if self.cfg.observation_noise_model: + # self._observation_noise_model.reset(env_ids) + + # reset the episode length buffer + wp.launch( + zero_mask_int32, + dim=self.num_envs, + inputs=[ + mask, + self._episode_length_buf_wp, + ], + ) + + """ + Implementation-specific functions. + """ + + def _setup_scene(self): + """Setup the scene for the environment. + + This function is responsible for creating the scene objects and setting up the scene for the environment. + The scene creation can happen through :class:`isaaclab.scene.InteractiveSceneCfg` or through + directly creating the scene objects and registering them with the scene manager. + + We leave the implementation of this function to the derived classes. If the environment does not require + any explicit scene setup, the function can be left empty. + """ + pass + + @abstractmethod + def _pre_physics_step(self, actions: wp.array) -> None: + """Pre-process actions before stepping through the physics. + + This function is responsible for pre-processing the actions before stepping through the physics. + It is called before the physics stepping (which is decimated). + + Args: + actions: The actions to apply on the environment. Shape is (num_envs, action_dim). + """ + raise NotImplementedError(f"Please implement the '_pre_physics_step' method for {self.__class__.__name__}.") + + @abstractmethod + def _apply_action(self) -> None: + """Apply actions to the simulator. + + This function is responsible for applying the actions to the simulator. It is called at each + physics time-step. Must be pure warp (no torch ops) to be CUDA graph capturable. + """ + raise NotImplementedError(f"Please implement the '_apply_action' method for {self.__class__.__name__}.") + + @abstractmethod + def _get_observations(self) -> None: + """Compute the observations for the environment. + + Writes results into the observation buffers (e.g., ``self.obs_buf``). + """ + raise NotImplementedError(f"Please implement the '_get_observations' method for {self.__class__.__name__}.") + + def _get_states(self) -> VecEnvObs | None: + """Compute and return the states for the environment. + + The state-space is used for asymmetric actor-critic architectures. It is configured + using the :attr:`DirectRLEnvCfg.state_space` parameter. + + Returns: + The states for the environment. If the environment does not have a state-space, the function + returns a None. + """ + return None # noqa: R501 + + @abstractmethod + def _get_rewards(self) -> None: + """Compute the rewards for the environment. + + Writes results into the reward buffer (e.g., ``self.reward_buf``). + """ + raise NotImplementedError(f"Please implement the '_get_rewards' method for {self.__class__.__name__}.") + + @abstractmethod + def _get_dones(self) -> None: + """Compute the done flags for the environment. + + Writes results into the done buffers (e.g., ``self.reset_terminated``, ``self.reset_time_outs``). + """ + raise NotImplementedError(f"Please implement the '_get_dones' method for {self.__class__.__name__}.") + + def _set_debug_vis_impl(self, debug_vis: bool): + """Set debug visualization into visualization objects. + + This function is responsible for creating the visualization objects if they don't exist + and input ``debug_vis`` is True. If the visualization objects exist, the function should + set their visibility into the stage. + """ + raise NotImplementedError(f"Debug visualization is not implemented for {self.__class__.__name__}.") diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py new file mode 100644 index 000000000000..f792b994f67b --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py @@ -0,0 +1,44 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp-native interactive scene with env_mask support for reset.""" + +from __future__ import annotations + +from collections.abc import Sequence + +import warp as wp + +from isaaclab.scene import InteractiveScene + + +class InteractiveSceneWarp(InteractiveScene): + """Interactive scene with warp-native env_mask support for reset. + + Extends :class:`InteractiveScene` to accept a boolean warp mask for selective resets, + avoiding the need to convert between env_ids and masks. + """ + + def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None = None): + """Reset scene entities using either env_ids or a boolean env_mask. + + Args: + env_ids: The indices of the environments to reset. Defaults to None (all instances). + env_mask: Boolean warp mask of shape (num_envs,). Defaults to None. + """ + # -- assets (support env_mask) + for articulation in self._articulations.values(): + articulation.reset(env_ids, env_mask=env_mask) + for deformable_object in self._deformable_objects.values(): + deformable_object.reset(env_ids) + for rigid_object in self._rigid_objects.values(): + rigid_object.reset(env_ids, env_mask=env_mask) + for surface_gripper in self._surface_grippers.values(): + surface_gripper.reset(env_ids) + for rigid_object_collection in self._rigid_object_collections.values(): + rigid_object_collection.reset(env_ids, env_mask=env_mask) + # -- sensors (no env_mask support) + for sensor in self._sensors.values(): + sensor.reset(env_ids) diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py new file mode 100644 index 000000000000..d28381b15b76 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Sub-package for environment utils.""" diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/utils/spaces.py b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/spaces.py new file mode 100644 index 000000000000..171e47e9debe --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/spaces.py @@ -0,0 +1,221 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +import gymnasium as gym +import json +import numpy as np +import torch +from typing import Any + +from isaaclab.envs.common import SpaceType + + +def spec_to_gym_space(spec: SpaceType) -> gym.spaces.Space: + """Generate an appropriate Gymnasium space according to the given space specification. + + Args: + spec: Space specification. + + Returns: + Gymnasium space. + + Raises: + ValueError: If the given space specification is not valid/supported. + """ + if isinstance(spec, gym.spaces.Space): + return spec + # fundamental spaces + # Box + elif isinstance(spec, int): + return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(spec,)) + elif isinstance(spec, list) and all(isinstance(x, int) for x in spec): + return gym.spaces.Box(low=-np.inf, high=np.inf, shape=spec) + # Discrete + elif isinstance(spec, set) and len(spec) == 1: + return gym.spaces.Discrete(n=next(iter(spec))) + # MultiDiscrete + elif isinstance(spec, list) and all(isinstance(x, set) and len(x) == 1 for x in spec): + return gym.spaces.MultiDiscrete(nvec=[next(iter(x)) for x in spec]) + # composite spaces + # Tuple + elif isinstance(spec, tuple): + return gym.spaces.Tuple([spec_to_gym_space(x) for x in spec]) + # Dict + elif isinstance(spec, dict): + return gym.spaces.Dict({k: spec_to_gym_space(v) for k, v in spec.items()}) + raise ValueError(f"Unsupported space specification: {spec}") + + +def sample_space(space: gym.spaces.Space, device: str, batch_size: int = -1, fill_value: float | None = None) -> Any: + """Sample a Gymnasium space where the data container are PyTorch tensors. + + Args: + space: Gymnasium space. + device: The device where the tensor should be created. + batch_size: Batch size. If the specified value is greater than zero, a batched space will be created and sampled from it. + fill_value: The value to fill the created tensors with. If None (default value), tensors will keep their random values. + + Returns: + Tensorized sampled space. + """ + + def tensorize(s, x): + if isinstance(s, gym.spaces.Box): + tensor = torch.tensor(x, device=device, dtype=torch.float32).reshape(batch_size, *s.shape) + if fill_value is not None: + tensor.fill_(fill_value) + return tensor + elif isinstance(s, gym.spaces.Discrete): + if isinstance(x, np.ndarray): + tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, 1) + if fill_value is not None: + tensor.fill_(int(fill_value)) + return tensor + elif isinstance(x, np.number) or type(x) in [int, float]: + tensor = torch.tensor([x], device=device, dtype=torch.int64).reshape(batch_size, 1) + if fill_value is not None: + tensor.fill_(int(fill_value)) + return tensor + elif isinstance(s, gym.spaces.MultiDiscrete): + if isinstance(x, np.ndarray): + tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, *s.shape) + if fill_value is not None: + tensor.fill_(int(fill_value)) + return tensor + elif isinstance(s, gym.spaces.Dict): + return {k: tensorize(_s, x[k]) for k, _s in s.items()} + elif isinstance(s, gym.spaces.Tuple): + return tuple([tensorize(_s, v) for _s, v in zip(s, x)]) + + sample = (gym.vector.utils.batch_space(space, batch_size) if batch_size > 0 else space).sample() + return tensorize(space, sample) + + +def serialize_space(space: SpaceType) -> str: + """Serialize a space specification as JSON. + + Args: + space: Space specification. + + Returns: + Serialized JSON representation. + """ + # Gymnasium spaces + if isinstance(space, gym.spaces.Discrete): + return json.dumps({"type": "gymnasium", "space": "Discrete", "n": int(space.n)}) + elif isinstance(space, gym.spaces.Box): + return json.dumps({ + "type": "gymnasium", + "space": "Box", + "low": space.low.tolist(), + "high": space.high.tolist(), + "shape": space.shape, + }) + elif isinstance(space, gym.spaces.MultiDiscrete): + return json.dumps({"type": "gymnasium", "space": "MultiDiscrete", "nvec": space.nvec.tolist()}) + elif isinstance(space, gym.spaces.Tuple): + return json.dumps({"type": "gymnasium", "space": "Tuple", "spaces": tuple(map(serialize_space, space.spaces))}) + elif isinstance(space, gym.spaces.Dict): + return json.dumps( + {"type": "gymnasium", "space": "Dict", "spaces": {k: serialize_space(v) for k, v in space.spaces.items()}} + ) + # Python data types + # Box + elif isinstance(space, int) or (isinstance(space, list) and all(isinstance(x, int) for x in space)): + return json.dumps({"type": "python", "space": "Box", "value": space}) + # Discrete + elif isinstance(space, set) and len(space) == 1: + return json.dumps({"type": "python", "space": "Discrete", "value": next(iter(space))}) + # MultiDiscrete + elif isinstance(space, list) and all(isinstance(x, set) and len(x) == 1 for x in space): + return json.dumps({"type": "python", "space": "MultiDiscrete", "value": [next(iter(x)) for x in space]}) + # composite spaces + # Tuple + elif isinstance(space, tuple): + return json.dumps({"type": "python", "space": "Tuple", "value": [serialize_space(x) for x in space]}) + # Dict + elif isinstance(space, dict): + return json.dumps( + {"type": "python", "space": "Dict", "value": {k: serialize_space(v) for k, v in space.items()}} + ) + raise ValueError(f"Unsupported space ({space})") + + +def deserialize_space(string: str) -> gym.spaces.Space: + """Deserialize a space specification encoded as JSON. + + Args: + string: Serialized JSON representation. + + Returns: + Space specification. + """ + obj = json.loads(string) + # Gymnasium spaces + if obj["type"] == "gymnasium": + if obj["space"] == "Discrete": + return gym.spaces.Discrete(n=obj["n"]) + elif obj["space"] == "Box": + return gym.spaces.Box(low=np.array(obj["low"]), high=np.array(obj["high"]), shape=obj["shape"]) + elif obj["space"] == "MultiDiscrete": + return gym.spaces.MultiDiscrete(nvec=np.array(obj["nvec"])) + elif obj["space"] == "Tuple": + return gym.spaces.Tuple(spaces=tuple(map(deserialize_space, obj["spaces"]))) + elif obj["space"] == "Dict": + return gym.spaces.Dict(spaces={k: deserialize_space(v) for k, v in obj["spaces"].items()}) + else: + raise ValueError(f"Unsupported space ({obj['spaces']})") + # Python data types + elif obj["type"] == "python": + if obj["space"] == "Discrete": + return {obj["value"]} + elif obj["space"] == "Box": + return obj["value"] + elif obj["space"] == "MultiDiscrete": + return [{x} for x in obj["value"]] + elif obj["space"] == "Tuple": + return tuple(map(deserialize_space, obj["value"])) + elif obj["space"] == "Dict": + return {k: deserialize_space(v) for k, v in obj["value"].items()} + else: + raise ValueError(f"Unsupported space ({obj['spaces']})") + else: + raise ValueError(f"Unsupported type ({obj['type']})") + + +def replace_env_cfg_spaces_with_strings(env_cfg: object) -> object: + """Replace spaces objects with their serialized JSON representations in an environment config. + + Args: + env_cfg: Environment config instance. + + Returns: + Environment config instance with spaces replaced if any. + """ + for attr in ["observation_space", "action_space", "state_space"]: + if hasattr(env_cfg, attr): + setattr(env_cfg, attr, serialize_space(getattr(env_cfg, attr))) + for attr in ["observation_spaces", "action_spaces"]: + if hasattr(env_cfg, attr): + setattr(env_cfg, attr, {k: serialize_space(v) for k, v in getattr(env_cfg, attr).items()}) + return env_cfg + + +def replace_strings_with_env_cfg_spaces(env_cfg: object) -> object: + """Replace spaces objects with their serialized JSON representations in an environment config. + + Args: + env_cfg: Environment config instance. + + Returns: + Environment config instance with spaces replaced if any. + """ + for attr in ["observation_space", "action_space", "state_space"]: + if hasattr(env_cfg, attr): + setattr(env_cfg, attr, deserialize_space(getattr(env_cfg, attr))) + for attr in ["observation_spaces", "action_spaces"]: + if hasattr(env_cfg, attr): + setattr(env_cfg, attr, {k: deserialize_space(v) for k, v in getattr(env_cfg, attr).items()}) + return env_cfg diff --git a/source/isaaclab_experimental/isaaclab_experimental/utils/timer.py b/source/isaaclab_experimental/isaaclab_experimental/utils/timer.py new file mode 100644 index 000000000000..039558a49347 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/utils/timer.py @@ -0,0 +1,270 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Sub-module for a timer class that can be used for performance measurements.""" + +from __future__ import annotations + +import math +import time +from contextlib import ContextDecorator +from typing import Any, ClassVar, Literal + +import warp as wp + + +class TimerError(Exception): + """A custom exception used to report errors in use of :class:`Timer` class.""" + + pass + + +class Timer(ContextDecorator): + """A timer for performance measurements. + + A class to keep track of time for performance measurement. + It allows timing via context managers and decorators as well. + + It uses the `time.perf_counter` function to measure time. This function + returns the number of seconds since the epoch as a float. It has the + highest resolution available on the system. + + As a regular object: + + .. code-block:: python + + import time + + from isaaclab_experimental.utils.timer import Timer + + timer = Timer() + timer.start() + time.sleep(1) + print(1 <= timer.time_elapsed <= 2) # Output: True + + time.sleep(1) + timer.stop() + print(2 <= stopwatch.total_run_time) # Output: True + + As a context manager: + + .. code-block:: python + + import time + + from isaaclab_experimental.utils.timer import Timer + + with Timer() as timer: + time.sleep(1) + print(1 <= timer.time_elapsed <= 2) # Output: True + + Reference: https://gist.github.com/sumeet/1123871 + """ + + timing_info: ClassVar[dict[str, dict[str, float]]] = dict() + """Dictionary for storing the elapsed time per timer instances globally. + + This dictionary logs the timer information. The keys are the names given to the timer class + at its initialization. If no :attr:`name` is passed to the constructor, no time + is recorded in the dictionary. + """ + + enable = True + """Whether to enable the timer.""" + + enable_display_output = True + """Whether to enable the display output.""" + + def __init__( + self, + msg: str | None = None, + name: str | None = None, + enable: bool = True, + format: Literal["s", "ms", "us", "ns"] = "s", + ): + """Initializes the timer. + + Args: + msg: The message to display when using the timer + class in a context manager. Defaults to None. + name: The name to use for logging times in a global + dictionary. Defaults to None. + enable: Whether to enable the timer. Defaults to True. + format: The format to use for the elapsed time. Defaults to "s". + """ + self._msg = msg + self._name = name + self._start_time = None + self._stop_time = None + self._elapsed_time = None + self._enable = enable if Timer.enable else False + self._format = format + + # Check if the format is valid + assert format in ["s", "ms", "us", "ns"], f"Invalid format, {format} is not in [s, ms, us, ns]" + # Convert the format to a multiplier + self._multiplier = { + "s": 1.0, + "ms": 1000.0, + "us": 1000000.0, + "ns": 1000000000.0, + }[format] + + # Online welford's algorithm to compute the mean and std of the elapsed time + self._mean = 0.0 + self._m2 = 0.0 + self._std = 0.0 + self._n = 0 + + def __str__(self) -> str: + """A string representation of the class object. + + Returns: + A string containing the elapsed time. + """ + return f"{(self.time_elapsed * self._multiplier):0.6f} {self._format}" + + """ + Properties + """ + + @property + def time_elapsed(self) -> float: + """The number of seconds that have elapsed since this timer started timing. + + Note: + This is used for checking how much time has elapsed while the timer is still running. + """ + return time.perf_counter() - self._start_time + + @property + def total_run_time(self) -> float: + """The number of seconds that elapsed from when the timer started to when it ended.""" + return self._elapsed_time + + """ + Operations + """ + + def start(self): + """Start timing.""" + if not self._enable: + return + + if self._start_time is not None: + raise TimerError("Timer is running. Use .stop() to stop it") + + self._start_time = time.perf_counter() + + def stop(self): + """Stop timing.""" + if not self._enable: + return + + if self._start_time is None: + raise TimerError("Timer is not running. Use .start() to start it") + + # Synchronize the device to make sure we time the whole operation + wp.synchronize_device() + + # Get the elapsed time + self._stop_time = time.perf_counter() + self._elapsed_time = self._stop_time - self._start_time + self._start_time = None + + if (self._name is not None) and (self._enable): + # Update the welford's algorithm + self.update_welford(self._elapsed_time) + + # Update the timing info + Timer.timing_info[self._name] = { + "last": self._elapsed_time, + "m2": self._m2, + "mean": self._mean, + "std": self._std, + "n": self._n, + } + + """ + Online welford's algorithm to compute the mean and std of the elapsed time + """ + + def update_welford(self, value: float): + """Update the welford's algorithm with a new value.""" + + try: + self._n = Timer.timing_info[self._name]["n"] + 1 + delta = value - Timer.timing_info[self._name]["mean"] + self._mean = Timer.timing_info[self._name]["mean"] + delta / self._n + delta2 = value - self._mean + self._m2 = Timer.timing_info[self._name]["m2"] + delta * delta2 + except KeyError: + self._n = 1 + self._mean = value + self._m2 = 0.0 + + # Update the std + self._std = math.sqrt(self._m2 / self._n) + + """ + Context managers + """ + + def __enter__(self) -> Timer: + """Start timing and return this `Timer` instance.""" + self.start() + return self + + def __exit__(self, *exc_info: Any): + """Stop timing.""" + self.stop() + # print message + if self._enable: + if (self._msg is not None) and (Timer.enable_display_output): + print( + self._msg, + f"Last: {(self._elapsed_time * self._multiplier):0.6f} {self._format}, " + f"Mean: {(self._mean * self._multiplier):0.6f} {self._format}, " + f"Std: {(self._std * self._multiplier):0.6f} {self._format}, " + f"N: {self._n}", + ) + + """ + Static Methods + """ + + @staticmethod + def get_timer_info(name: str) -> float: + """Retrieves the time logged in the global dictionary + based on name. + + Args: + name: Name of the the entry to be retrieved. + + Raises: + TimerError: If name doesn't exist in the log. + + Returns: + A float containing the time logged if the name exists. + """ + if name not in Timer.timing_info: + raise TimerError(f"Timer {name} does not exist") + return Timer.timing_info.get(name)["last"] + + @staticmethod + def get_timer_statistics(name: str) -> dict[str, float]: + """Retrieves the time logged in the global dictionary + based on name. + + Returns: + A dictionary containing the time logged for all timers. + """ + + if name not in Timer.timing_info: + raise TimerError(f"Timer {name} does not exist") + + keys = ["mean", "std", "n"] + + return {k: Timer.timing_info[name][k] for k in keys} diff --git a/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py b/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py new file mode 100644 index 000000000000..c7a9d8c525f1 --- /dev/null +++ b/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py @@ -0,0 +1,43 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp CUDA graph capture-or-replay utility.""" + +from collections.abc import Callable +from typing import Any + +import warp as wp + + +class WarpGraphCache: + """Caches Warp CUDA graphs by stage name: captures on first call, replays after. + + Usage:: + + cache = WarpGraphCache() + cache.capture_or_replay("my_stage", my_warp_function) + # uncaptured work here ... + cache.capture_or_replay("my_stage_post", my_other_function) + """ + + def __init__(self): + self._graphs: dict[str, Any] = {} + + def capture_or_replay(self, stage: str, fn: Callable[[], Any]) -> None: + """Capture *fn* into a CUDA graph on the first call, then replay.""" + graph = self._graphs.get(stage) + if graph is None: + with wp.ScopedCapture() as capture: + fn() + self._graphs[stage] = capture.graph + else: + wp.capture_launch(graph) + + def invalidate(self, stage: str | None = None) -> None: + """Drop cached graph(s). If *stage* is ``None``, drop all.""" + if stage is None: + self._graphs.clear() + else: + self._graphs.pop(stage, None) diff --git a/source/isaaclab_experimental/pyproject.toml b/source/isaaclab_experimental/pyproject.toml new file mode 100644 index 000000000000..d90ac3536f16 --- /dev/null +++ b/source/isaaclab_experimental/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel", "toml"] +build-backend = "setuptools.build_meta" diff --git a/source/isaaclab_experimental/setup.py b/source/isaaclab_experimental/setup.py new file mode 100644 index 000000000000..c84086f0fad1 --- /dev/null +++ b/source/isaaclab_experimental/setup.py @@ -0,0 +1,53 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Installation script for the 'isaaclab_experimental' python package.""" + +import os + +import toml +from setuptools import find_packages, setup + +# Obtain the extension data from the extension.toml file +EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__)) +# Read the extension.toml file +EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml")) + +# Minimum dependencies required prior to installation +INSTALL_REQUIRES = [ + # generic + "numpy>2", + "warp-lang>=1.9.0.dev20250825", # TODO: update to 1.11.0 + "torch>=2.7", + "prettytable==3.3.0", + "toml", +] + + +PYTORCH_INDEX_URL = ["https://download.pytorch.org/whl/cu118"] + +# Installation operation +setup( + name="isaaclab_experimental", + author="Isaac Lab Project Developers", + maintainer="Isaac Lab Project Developers", + url=EXTENSION_TOML_DATA["package"]["repository"], + version=EXTENSION_TOML_DATA["package"]["version"], + description=EXTENSION_TOML_DATA["package"]["description"], + keywords=EXTENSION_TOML_DATA["package"]["keywords"], + license="BSD-3-Clause", + include_package_data=True, + python_requires=">=3.10", + install_requires=INSTALL_REQUIRES, + dependency_links=PYTORCH_INDEX_URL, + packages=find_packages(), + classifiers=[ + "Natural Language :: English", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Isaac Sim :: 5.0.0", + ], + zip_safe=False, +) diff --git a/source/isaaclab_newton/setup.py b/source/isaaclab_newton/setup.py index 2ae60338168d..8ac8b5700159 100644 --- a/source/isaaclab_newton/setup.py +++ b/source/isaaclab_newton/setup.py @@ -22,7 +22,7 @@ # newton "mujoco==3.5.0", "mujoco-warp==3.5.0.2", - "newton==1.0.0rc1", + "newton==1.0.0rc3", "PyOpenGL-accelerate==3.1.10", ] diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py index e7e52ca43a96..7e753c0f7a2a 100644 --- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py +++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py @@ -47,7 +47,16 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N # NOTE: import here (not at module level) to avoid loading heavy env classes before Isaac Sim is initialized. from isaaclab.envs import DirectRLEnv, ManagerBasedEnv, ManagerBasedRLEnv - if not isinstance(env.unwrapped, (ManagerBasedRLEnv, ManagerBasedEnv, DirectRLEnv)): + try: + from isaaclab_experimental.envs import DirectRLEnvWarp + except ImportError: + DirectRLEnvWarp = None + + allowed_types = (ManagerBasedRLEnv, ManagerBasedEnv, DirectRLEnv) + if DirectRLEnvWarp is not None: + allowed_types += (DirectRLEnvWarp,) + + if not isinstance(env.unwrapped, allowed_types): raise ValueError( "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:" f" {type(env)}" diff --git a/source/isaaclab_tasks_experimental/config/extension.toml b/source/isaaclab_tasks_experimental/config/extension.toml new file mode 100644 index 000000000000..75de8d6da0ef --- /dev/null +++ b/source/isaaclab_tasks_experimental/config/extension.toml @@ -0,0 +1,22 @@ +[package] + +# Note: Semantic Versioning is used: https://semver.org/ +version = "0.0.1" + +# Description +title = "Experimental environments for IsaacLab" +description="Extension containing suite of experimental environments for robot learning." +readme = "docs/README.md" +repository = "https://github.com/isaac-sim/IsaacLab" +category = "robotics" +keywords = ["robotics", "rl", "il", "learning"] + +[dependencies] +"isaaclab" = {} +"isaaclab_assets" = {} + +[core] +reloadable = false + +[[python.module]] +name = "isaaclab_tasks.experimental" diff --git a/source/isaaclab_tasks_experimental/docs/README.md b/source/isaaclab_tasks_experimental/docs/README.md new file mode 100644 index 000000000000..d9e681518d64 --- /dev/null +++ b/source/isaaclab_tasks_experimental/docs/README.md @@ -0,0 +1,3 @@ +# Isaac Lab: Experimental Environment Suite + +Experimental environments for robot learning built on top of Isaac Lab. diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py new file mode 100644 index 000000000000..918c41d73d7b --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py @@ -0,0 +1,32 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Package containing task implementations for various robotic environments.""" + +import os +import toml + +# Conveniences to other module directories via relative paths +ISAACLAB_TASKS_EXPERIMENTAL_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) +"""Path to the extension source directory.""" + +ISAACLAB_TASKS_EXPERIMENTAL_METADATA = toml.load( + os.path.join(ISAACLAB_TASKS_EXPERIMENTAL_EXT_DIR, "config", "extension.toml") +) +"""Extension metadata dictionary parsed from the extension.toml file.""" + +# Configure the module-level variables +__version__ = ISAACLAB_TASKS_EXPERIMENTAL_METADATA["package"]["version"] + +## +# Register Gym environments. +## + +from isaaclab_tasks.utils import import_packages + +# The blacklist is used to prevent importing configs from sub-packages +_BLACKLIST_PKGS = ["utils", ".mdp"] +# Import all configs in this package +import_packages(__name__, _BLACKLIST_PKGS) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py new file mode 100644 index 000000000000..3e2b7945ebde --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Direct workflow environments. +""" + +import gymnasium as gym diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py new file mode 100644 index 000000000000..c954081b9c54 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py @@ -0,0 +1,29 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +""" +Allegro Inhand Manipulation environment. +""" + +import gymnasium as gym + +## +# Register Gym environments. +## + +inhand_task_entry = "isaaclab_tasks_experimental.direct.inhand_manipulation" +stable_agents = "isaaclab_tasks.direct.allegro_hand.agents" + +gym.register( + id="Isaac-Repose-Cube-Allegro-Direct-Warp-v0", + entry_point=f"{inhand_task_entry}.inhand_manipulation_warp_env:InHandManipulationWarpEnv", + disable_env_checker=True, + kwargs={ + "env_cfg_entry_point": f"{__name__}.allegro_hand_warp_env_cfg:AllegroHandWarpEnvCfg", + "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml", + "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:AllegroHandPPORunnerCfg", + "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml", + }, +) diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py new file mode 100644 index 000000000000..2d607d475329 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py @@ -0,0 +1,136 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg + +import isaaclab.sim as sim_utils +from isaaclab.assets import ArticulationCfg # , RigidObjectCfg +from isaaclab.envs import DirectRLEnvCfg +from isaaclab.markers import VisualizationMarkersCfg +from isaaclab.scene import InteractiveSceneCfg +from isaaclab.sim import SimulationCfg +from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg +from isaaclab.utils import configclass +from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR + +from isaaclab_assets.robots.allegro import ALLEGRO_HAND_CFG + + +@configclass +class AllegroHandWarpEnvCfg(DirectRLEnvCfg): + # env + decimation = 4 + episode_length_s = 10.0 + action_space = 16 + observation_space = 124 # (full) + state_space = 0 + asymmetric_obs = False + obs_type = "full" + + solver_cfg = MJWarpSolverCfg( + solver="newton", + integrator="implicitfast", + njmax=80, + nconmax=70, + impratio=10.0, + cone="elliptic", + update_data_interval=2, + iterations=100, + ls_iterations=15, + ls_parallel=False, + # save_to_mjcf="AllegroHand.xml", + ) + + newton_cfg = NewtonCfg( + solver_cfg=solver_cfg, + num_substeps=2, + debug_mode=False, + ) + # simulation + sim: SimulationCfg = SimulationCfg( + dt=1 / 120, + render_interval=decimation, + physics_material=RigidBodyMaterialCfg( + static_friction=1.0, + dynamic_friction=1.0, + ), + physics=newton_cfg, + ) + # robot + robot_cfg: ArticulationCfg = ALLEGRO_HAND_CFG.replace(prim_path="/World/envs/env_.*/Robot") + + actuated_joint_names = [ + "index_joint_0", + "middle_joint_0", + "ring_joint_0", + "thumb_joint_0", + "index_joint_1", + "index_joint_2", + "index_joint_3", + "middle_joint_1", + "middle_joint_2", + "middle_joint_3", + "ring_joint_1", + "ring_joint_2", + "ring_joint_3", + "thumb_joint_1", + "thumb_joint_2", + "thumb_joint_3", + ] + fingertip_body_names = [ + "index_link_3", + "middle_link_3", + "ring_link_3", + "thumb_link_3", + ] + + # in-hand object + object_cfg: ArticulationCfg = ArticulationCfg( + prim_path="/World/envs/env_.*/object", + spawn=sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd", + mass_props=sim_utils.MassPropertiesCfg(density=400.0), + scale=(1.2, 1.2, 1.2), + ), + # FIXME: it does seem to be a bug for ArticulationCfg for handling empty joint list + init_state=ArticulationCfg.InitialStateCfg( + pos=(0.0, -0.17, 0.565), rot=(0.0, 0.0, 0.0, 1.0), joint_pos={}, joint_vel={} + ), + actuators={}, + articulation_root_prim_path="", + ) + # goal object + goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg( + prim_path="/Visuals/goal_marker", + markers={ + "goal": sim_utils.UsdFileCfg( + usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd", + scale=(1.2, 1.2, 1.2), + ) + }, + ) + # scene + scene: InteractiveSceneCfg = InteractiveSceneCfg( + num_envs=8192, env_spacing=0.75, replicate_physics=True, clone_in_fabric=True + ) + # reset + reset_position_noise = 0.01 # range of position at reset + reset_dof_pos_noise = 0.2 # range of dof pos at reset + reset_dof_vel_noise = 0.0 # range of dof vel at reset + # reward scales + dist_reward_scale = -10.0 + rot_reward_scale = 1.0 + rot_eps = 0.1 + action_penalty_scale = -0.0002 + reach_goal_bonus = 250 + fall_penalty = 0 + fall_dist = 0.24 + vel_obs_scale = 0.2 + success_tolerance = 0.2 + max_consecutive_success = 0 + av_factor = 0.1 + act_moving_average = 1.0 + force_torque_obs_scale = 10.0 diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py new file mode 100644 index 000000000000..460a30569089 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py new file mode 100644 index 000000000000..fca92ac1e514 --- /dev/null +++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py @@ -0,0 +1,980 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + + +from __future__ import annotations + +from collections.abc import Sequence +from typing import TYPE_CHECKING + +import torch +import warp as wp +from isaaclab_experimental.envs import DirectRLEnvWarp + +import isaaclab.sim as sim_utils +from isaaclab.assets import Articulation # , RigidObject +from isaaclab.markers import VisualizationMarkers +from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane + +if TYPE_CHECKING: + from isaaclab_tasks_experimental.direct.allegro_hand.allegro_hand_warp_env_cfg import AllegroHandWarpEnvCfg + + +@wp.kernel +def initialize_rng_state( + # input + seed: wp.int32, + # output + state: wp.array(dtype=wp.uint32), +): + env_id = wp.tid() + state[env_id] = wp.rand_init(seed, wp.int32(env_id)) + + +@wp.kernel +def apply_actions_to_targets( + # input + actions: wp.array2d(dtype=wp.float32), + lower_limits: wp.array2d(dtype=wp.float32), + upper_limits: wp.array2d(dtype=wp.float32), + actuated_dof_indices: wp.array(dtype=wp.int32), + act_moving_average: wp.float32, + # input/output + prev_targets: wp.array2d(dtype=wp.float32), + # output + cur_targets: wp.array2d(dtype=wp.float32), +): + env_id, i = wp.tid() + dof_id = actuated_dof_indices[i] + + # clamp and scale action to target range + a = wp.clamp(actions[env_id, i], wp.float32(-1.0), wp.float32(1.0)) + lower = lower_limits[env_id, dof_id] + upper = upper_limits[env_id, dof_id] + t = scale(a, lower, upper) + + # smoothing and boundary clamping + t = act_moving_average * t + (wp.float32(1.0) - act_moving_average) * prev_targets[env_id, dof_id] + t = wp.clamp(t, lower, upper) + + # update targets + cur_targets[env_id, dof_id] = t + prev_targets[env_id, dof_id] = t + + +@wp.kernel +def reset_target_pose( + # input + env_mask: wp.array(dtype=wp.bool), + x_unit_vec: wp.vec3f, + y_unit_vec: wp.vec3f, + env_origins: wp.array(dtype=wp.vec3f), + goal_pos: wp.array(dtype=wp.vec3f), + # input/output + rng_state: wp.array(dtype=wp.uint32), + # output + goal_rot: wp.array(dtype=wp.quatf), + reset_goal_buf: wp.array(dtype=wp.bool), + goal_pos_w: wp.array(dtype=wp.vec3f), +): + env_id = wp.tid() + if env_mask[env_id]: + rand0 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + rand1 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + + goal_rot[env_id] = randomize_rotation(rand0, rand1, x_unit_vec, y_unit_vec) + reset_goal_buf[env_id] = False + + # Warp-native addition: goal position in world frame. + goal_pos_w[env_id] = goal_pos[env_id] + env_origins[env_id] + + +@wp.kernel +def reset_object( + # input + default_root_pose: wp.array(dtype=wp.transformf), + env_origins: wp.array(dtype=wp.vec3f), + reset_position_noise: wp.float32, + x_unit_vec: wp.vec3f, + y_unit_vec: wp.vec3f, + env_mask: wp.array(dtype=wp.bool), + # input/output + rng_state: wp.array(dtype=wp.uint32), + # output + root_pose_w: wp.array(dtype=wp.transformf), + root_vel_w: wp.array(dtype=wp.spatial_vectorf), +): + env_id = wp.tid() + if env_mask[env_id]: + nx = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + ny = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + nz = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + + pos_noise = reset_position_noise * wp.vec3f(nx, ny, nz) + base_pos = wp.transform_get_translation(default_root_pose[env_id]) + pos_w = base_pos + env_origins[env_id] + pos_noise + + rand0 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + rand1 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + rot_w = randomize_rotation(rand0, rand1, x_unit_vec, y_unit_vec) + + # The following should be equivalent, but consider using write_root_pose_to_sim and write_root_velocity_to_sim + root_pose_w[env_id] = wp.transform(pos_w, rot_w) + root_vel_w[env_id] = wp.spatial_vectorf( + wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0) + ) + + +@wp.kernel +def reset_hand( + # input + default_joint_pos: wp.array2d(dtype=wp.float32), + default_joint_vel: wp.array2d(dtype=wp.float32), + lower_limits: wp.array2d(dtype=wp.float32), + upper_limits: wp.array2d(dtype=wp.float32), + reset_dof_pos_noise: wp.float32, + reset_dof_vel_noise: wp.float32, + env_mask: wp.array(dtype=wp.bool), + num_dofs: wp.int32, + # input/output + rng_state: wp.array(dtype=wp.uint32), + # output + joint_pos: wp.array2d(dtype=wp.float32), + joint_vel: wp.array2d(dtype=wp.float32), + prev_targets: wp.array2d(dtype=wp.float32), + cur_targets: wp.array2d(dtype=wp.float32), + hand_dof_targets: wp.array2d(dtype=wp.float32), +): + env_id = wp.tid() + if env_mask[env_id]: + # Each env runs sequentially inside this kernel (avoids RNG races across DOFs). + for dof_id in range(num_dofs): + dof_pos_noise = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + + delta_max = upper_limits[env_id, dof_id] - default_joint_pos[env_id, dof_id] + delta_min = lower_limits[env_id, dof_id] - default_joint_pos[env_id, dof_id] + rand_delta = delta_min + (delta_max - delta_min) * 0.5 * dof_pos_noise + pos = default_joint_pos[env_id, dof_id] + reset_dof_pos_noise * rand_delta + + dof_vel_noise = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0)) + rng_state[env_id] += wp.uint32(1) + vel = default_joint_vel[env_id, dof_id] + reset_dof_vel_noise * dof_vel_noise + + # The following lines should be equivalent to the following: + # self.hand.write_joint_state_to_sim(dof_pos, dof_vel, env_ids=env_ids) + joint_pos[env_id, dof_id] = pos + joint_vel[env_id, dof_id] = vel + + prev_targets[env_id, dof_id] = pos + cur_targets[env_id, dof_id] = pos + hand_dof_targets[env_id, dof_id] = pos + + +@wp.kernel +def reset_successes( + # input + env_mask: wp.array(dtype=wp.bool), + # output + successes: wp.array(dtype=wp.float32), +): + env_id = wp.tid() + if env_mask[env_id]: + successes[env_id] = wp.float32(0.0) + + +@wp.kernel +def compute_intermediate_values( + # input + body_pos_w: wp.array2d(dtype=wp.vec3f), + body_quat_w: wp.array2d(dtype=wp.quatf), + body_vel_w: wp.array2d(dtype=wp.spatial_vectorf), + finger_bodies: wp.array(dtype=wp.int32), + env_origins: wp.array(dtype=wp.vec3f), + object_root_pose_w: wp.array(dtype=wp.transformf), + object_root_vel_w: wp.array(dtype=wp.spatial_vectorf), + num_fingertips: wp.int32, + # output + fingertip_pos: wp.array2d(dtype=wp.vec3f), + fingertip_rot: wp.array2d(dtype=wp.quatf), + fingertip_velocities: wp.array2d(dtype=wp.spatial_vectorf), + object_pose: wp.array(dtype=wp.transformf), + object_vels: wp.array(dtype=wp.spatial_vectorf), +): + env_id = wp.tid() + + for i in range(num_fingertips): + body_id = finger_bodies[i] + fingertip_pos[env_id, i] = body_pos_w[env_id, body_id] - env_origins[env_id] + fingertip_rot[env_id, i] = body_quat_w[env_id, body_id] + fingertip_velocities[env_id, i] = body_vel_w[env_id, body_id] + + # Store object pose in env-local frame (translation only; orientation unchanged). + pos_w = wp.transform_get_translation(object_root_pose_w[env_id]) + pos = pos_w - env_origins[env_id] + rot = wp.transform_get_rotation(object_root_pose_w[env_id]) + object_pose[env_id] = wp.transform(pos, rot) + object_vels[env_id] = object_root_vel_w[env_id] + + +@wp.kernel +def get_dones( + # input + max_episode_length: wp.int32, + object_pose: wp.array(dtype=wp.transformf), + in_hand_pos: wp.array(dtype=wp.vec3f), + goal_rot: wp.array(dtype=wp.quatf), + fall_dist: wp.float32, + success_tolerance: wp.float32, + max_consecutive_success: wp.int32, + successes: wp.array(dtype=wp.float32), + # input/output + episode_length_buf: wp.array(dtype=wp.int32), + # output + out_of_reach: wp.array(dtype=wp.bool), + time_out: wp.array(dtype=wp.bool), + reset: wp.array(dtype=wp.bool), +): + env_id = wp.tid() + + object_pos = wp.transform_get_translation(object_pose[env_id]) + object_rot = wp.transform_get_rotation(object_pose[env_id]) + + goal_dist = wp.length(object_pos - in_hand_pos[env_id]) + out_of_reach[env_id] = goal_dist >= fall_dist + + max_success_reached = False + if max_consecutive_success > 0: + # Reset progress (episode length buf) on goal envs if max_consecutive_success > 0 + rot_dist = rotation_distance(object_rot, goal_rot[env_id]) + if wp.abs(rot_dist) <= success_tolerance: + episode_length_buf[env_id] = 0 + max_success_reached = successes[env_id] >= wp.float32(max_consecutive_success) + + time_out[env_id] = episode_length_buf[env_id] >= (max_episode_length - 1) or max_success_reached + reset[env_id] = out_of_reach[env_id] or time_out[env_id] + + +@wp.kernel +def compute_reduced_observations( + # input + fingertip_pos: wp.array2d(dtype=wp.vec3f), + object_pose: wp.array(dtype=wp.transformf), + goal_rot: wp.array(dtype=wp.quatf), + actions: wp.array2d(dtype=wp.float32), + num_fingertips: wp.int32, + action_dim: wp.int32, + # output + observations: wp.array2d(dtype=wp.float32), +): + env_id = wp.tid() + + obj_pos = wp.transform_get_translation(object_pose[env_id]) + obj_rot = wp.transform_get_rotation(object_pose[env_id]) + + idx = int(0) + for i in range(num_fingertips): + observations[env_id, idx + 0] = fingertip_pos[env_id, i][0] + observations[env_id, idx + 1] = fingertip_pos[env_id, i][1] + observations[env_id, idx + 2] = fingertip_pos[env_id, i][2] + idx += 3 + + observations[env_id, idx + 0] = obj_pos[0] + observations[env_id, idx + 1] = obj_pos[1] + observations[env_id, idx + 2] = obj_pos[2] + idx += 3 + + rel = obj_rot * wp.quat_inverse(goal_rot[env_id]) + observations[env_id, idx + 0] = rel[0] + observations[env_id, idx + 1] = rel[1] + observations[env_id, idx + 2] = rel[2] + observations[env_id, idx + 3] = rel[3] + idx += 4 + + for i in range(action_dim): + observations[env_id, idx + i] = actions[env_id, i] + + +@wp.kernel +def compute_full_observations( + # input + hand_dof_pos: wp.array2d(dtype=wp.float32), + hand_dof_vel: wp.array2d(dtype=wp.float32), + hand_dof_lower_limits: wp.array2d(dtype=wp.float32), + hand_dof_upper_limits: wp.array2d(dtype=wp.float32), + vel_obs_scale: wp.float32, + object_pose: wp.array(dtype=wp.transformf), + object_vels: wp.array(dtype=wp.spatial_vectorf), + in_hand_pos: wp.array(dtype=wp.vec3f), + goal_rot: wp.array(dtype=wp.quatf), + fingertip_pos: wp.array2d(dtype=wp.vec3f), + fingertip_rot: wp.array2d(dtype=wp.quatf), + fingertip_velocities: wp.array2d(dtype=wp.spatial_vectorf), + actions: wp.array2d(dtype=wp.float32), + num_hand_dofs: wp.int32, + num_fingertips: wp.int32, + action_dim: wp.int32, + # output + observations: wp.array2d(dtype=wp.float32), +): + env_id = wp.tid() + + # hand + for i in range(num_hand_dofs): + observations[env_id, i] = unscale( + hand_dof_pos[env_id, i], hand_dof_lower_limits[env_id, i], hand_dof_upper_limits[env_id, i] + ) + + offset = num_hand_dofs + for i in range(num_hand_dofs): + observations[env_id, offset + i] = vel_obs_scale * hand_dof_vel[env_id, i] + offset += num_hand_dofs + + # object + obj_pos = wp.transform_get_translation(object_pose[env_id]) + obj_rot = wp.transform_get_rotation(object_pose[env_id]) + + observations[env_id, offset + 0] = obj_pos[0] + observations[env_id, offset + 1] = obj_pos[1] + observations[env_id, offset + 2] = obj_pos[2] + offset += 3 + + observations[env_id, offset + 0] = obj_rot[0] + observations[env_id, offset + 1] = obj_rot[1] + observations[env_id, offset + 2] = obj_rot[2] + observations[env_id, offset + 3] = obj_rot[3] + offset += 4 + + # spatial_vectorf layout: [0:3]=angular, [3:6]=linear + # torch reference order: linear (unscaled) first, then angular (scaled) + observations[env_id, offset + 0] = object_vels[env_id][3] + observations[env_id, offset + 1] = object_vels[env_id][4] + observations[env_id, offset + 2] = object_vels[env_id][5] + offset += 3 + + observations[env_id, offset + 0] = vel_obs_scale * object_vels[env_id][0] + observations[env_id, offset + 1] = vel_obs_scale * object_vels[env_id][1] + observations[env_id, offset + 2] = vel_obs_scale * object_vels[env_id][2] + offset += 3 + + # goal + observations[env_id, offset + 0] = in_hand_pos[env_id][0] + observations[env_id, offset + 1] = in_hand_pos[env_id][1] + observations[env_id, offset + 2] = in_hand_pos[env_id][2] + offset += 3 + + observations[env_id, offset + 0] = goal_rot[env_id][0] + observations[env_id, offset + 1] = goal_rot[env_id][1] + observations[env_id, offset + 2] = goal_rot[env_id][2] + observations[env_id, offset + 3] = goal_rot[env_id][3] + offset += 4 + + rel = obj_rot * wp.quat_inverse(goal_rot[env_id]) + observations[env_id, offset + 0] = rel[0] + observations[env_id, offset + 1] = rel[1] + observations[env_id, offset + 2] = rel[2] + observations[env_id, offset + 3] = rel[3] + offset += 4 + + # fingertips + for i in range(num_fingertips): + observations[env_id, offset + 0] = fingertip_pos[env_id, i][0] + observations[env_id, offset + 1] = fingertip_pos[env_id, i][1] + observations[env_id, offset + 2] = fingertip_pos[env_id, i][2] + offset += 3 + + for i in range(num_fingertips): + observations[env_id, offset + 0] = fingertip_rot[env_id, i][0] + observations[env_id, offset + 1] = fingertip_rot[env_id, i][1] + observations[env_id, offset + 2] = fingertip_rot[env_id, i][2] + observations[env_id, offset + 3] = fingertip_rot[env_id, i][3] + offset += 4 + + for i in range(num_fingertips): + for j in range(6): + observations[env_id, offset + j] = fingertip_velocities[env_id, i][j] + offset += 6 + + # actions + for i in range(action_dim): + observations[env_id, offset + i] = actions[env_id, i] + + +@wp.kernel +def sanitize_and_print_once( + # input/output + obs: wp.array(dtype=wp.float32), + printed_flag: wp.array(dtype=wp.int32), +): + i = wp.tid() + v = obs[i] + + if not wp.isfinite(v): + # Try to claim the "print token" + if wp.atomic_cas(printed_flag, 0, 0, 1) == 0: + wp.printf("Non-finite values in observations") + + obs[i] = wp.float32(0.0) + + +@wp.kernel +def compute_rewards( + # input + reset_buf: wp.array(dtype=wp.bool), + object_pose: wp.array(dtype=wp.transformf), + target_pos: wp.array(dtype=wp.vec3f), + target_rot: wp.array(dtype=wp.quatf), + dist_reward_scale: wp.float32, + rot_reward_scale: wp.float32, + rot_eps: wp.float32, + actions: wp.array2d(dtype=wp.float32), + action_penalty_scale: wp.float32, + success_tolerance: wp.float32, + reach_goal_bonus: wp.float32, + fall_dist: wp.float32, + fall_penalty: wp.float32, + action_dim: wp.int32, + # input/output + reset_goal_buf: wp.array(dtype=wp.bool), + successes: wp.array(dtype=wp.float32), + num_resets_out: wp.array(dtype=wp.float32), + finished_cons_successes_out: wp.array(dtype=wp.float32), + # output + reward_out: wp.array(dtype=wp.float32), +): + env_id = wp.tid() + + obj_pos = wp.transform_get_translation(object_pose[env_id]) + obj_rot = wp.transform_get_rotation(object_pose[env_id]) + + goal_dist = wp.length(obj_pos - target_pos[env_id]) + rot_dist = rotation_distance(obj_rot, target_rot[env_id]) + + dist_rew = goal_dist * dist_reward_scale + rot_rew = wp.float32(1.0) / (wp.abs(rot_dist) + rot_eps) * rot_reward_scale + + action_penalty = wp.float32(0.0) + for i in range(action_dim): + action_penalty += actions[env_id, i] * actions[env_id, i] + + # Total reward is: position distance + orientation alignment + action regularization + success bonus + fall penalty + reward = dist_rew + rot_rew + action_penalty * action_penalty_scale + + # Find out which envs hit the goal and update successes count + reached = wp.abs(rot_dist) <= success_tolerance + goal_resets = reached or reset_goal_buf[env_id] + reset_goal_buf[env_id] = goal_resets + if goal_resets: + successes[env_id] = successes[env_id] + wp.float32(1.0) + + # Success bonus: orientation is within `success_tolerance` of goal orientation + if goal_resets: + reward = reward + reach_goal_bonus + + # Fall penalty: distance to the goal is larger than a threshold + if goal_dist >= fall_dist: + reward = reward + fall_penalty + + # Consecutive-successes stats (mirrors Torch env): + # resets = torch.where(goal_dist >= fall_dist, ones_like(reset_buf), reset_buf) + resets = (goal_dist >= fall_dist) or reset_buf[env_id] + if resets: + wp.atomic_add(num_resets_out, 0, wp.float32(1.0)) + wp.atomic_add(finished_cons_successes_out, 0, successes[env_id]) + + reward_out[env_id] = reward + + +@wp.kernel +def update_consecutive_successes_from_stats( + # input + num_resets: wp.array(dtype=wp.float32), + finished_cons_successes: wp.array(dtype=wp.float32), + av_factor: wp.float32, + # input/output + consecutive_successes: wp.array(dtype=wp.float32), +): + """Finalize the Torch env's EMA update for consecutive_successes and clear the accumulators.""" + # single-thread kernel (dim=1) + n = num_resets[0] + prev = consecutive_successes[0] + if n > wp.float32(0.0): + consecutive_successes[0] = av_factor * (finished_cons_successes[0] / n) + (wp.float32(1.0) - av_factor) * prev + + +@wp.func +def scale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32: + return wp.float32(0.5) * (x + wp.float32(1.0)) * (upper - lower) + lower + + +@wp.func +def unscale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32: + return (wp.float32(2.0) * x - upper - lower) / (upper - lower) + + +@wp.func +def randomize_rotation(rand0: wp.float32, rand1: wp.float32, x_axis: wp.vec3f, y_axis: wp.vec3f) -> wp.quatf: + return wp.quat_from_axis_angle(x_axis, rand0 * wp.pi) * wp.quat_from_axis_angle(y_axis, rand1 * wp.pi) + + +@wp.func +def rotation_distance(object_rot: wp.quatf, target_rot: wp.quatf) -> wp.float32: + # Orientation alignment for the cube in hand and goal cube + quat_diff = object_rot * wp.quat_inverse(target_rot) + # Match Torch env convention: uses indices [1:4] for the vector part (see `rotation_distance` in Torch env). + v_norm = wp.sqrt(quat_diff[1] * quat_diff[1] + quat_diff[2] * quat_diff[2] + quat_diff[3] * quat_diff[3]) + v_norm = wp.min(v_norm, wp.float32(1.0)) + return wp.float32(2.0) * wp.asin(v_norm) + + +class InHandManipulationWarpEnv(DirectRLEnvWarp): + cfg: AllegroHandWarpEnvCfg # | ShadowHandWarpEnvCfg + + # def __init__(self, cfg: AllegroHandWarpEnvCfg | ShadowHandWarpEnvCfg, render_mode: str | None = None, **kwargs): + def __init__(self, cfg: AllegroHandWarpEnvCfg, render_mode: str | None = None, **kwargs): + super().__init__(cfg, render_mode, **kwargs) + + # --------------------------------------------------------------------- + # Constants + # --------------------------------------------------------------------- + + # dof used for joint related init and sample + self.num_hand_dofs = self.hand.num_joints + + # list of actuated joints + actuated_dof_indices: list[int] = list() + for joint_name in cfg.actuated_joint_names: + actuated_dof_indices.append(self.hand.joint_names.index(joint_name)) + actuated_dof_indices.sort() + self.num_actuated_dofs = len(actuated_dof_indices) + + # Warp index/mask helpers for kernels and articulation APIs. + self.actuated_dof_indices = wp.array(actuated_dof_indices, dtype=wp.int32, device=self.device) + actuated_mask = [False] * self.num_hand_dofs + for idx in actuated_dof_indices: + actuated_mask[idx] = True + self.actuated_dof_mask = wp.array(actuated_mask, dtype=wp.bool, device=self.device) + + # finger bodies + finger_bodies: list[int] = list() + for body_name in self.cfg.fingertip_body_names: + finger_bodies.append(self.hand.body_names.index(body_name)) + finger_bodies.sort() + self.num_fingertips = len(finger_bodies) + self.finger_bodies = wp.array(finger_bodies, dtype=wp.int32, device=self.device) + + # joint limits + self.hand_dof_lower_limits = self.hand.data.joint_pos_limits_lower + self.hand_dof_upper_limits = self.hand.data.joint_pos_limits_upper + + # unit vectors + self.x_unit_vec = wp.vec3f(1.0, 0.0, 0.0) + self.y_unit_vec = wp.vec3f(0.0, 1.0, 0.0) + self.z_unit_vec = wp.vec3f(0.0, 0.0, 1.0) + + # Per-env origins (Warp view for kernels; Torch env uses `self.scene.env_origins` directly). + self.env_origins = wp.from_torch(self.scene.env_origins, dtype=wp.vec3f) + + # --------------------------------------------------------------------- + # Warp buffers + # --------------------------------------------------------------------- + + # buffers for position targets + self.hand_dof_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device) + self.prev_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device) + self.cur_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device) + + # track goal resets + self.reset_goal_buf = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device) + # used to compare object position + self.in_hand_pos = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device) + # default goal positions + self.goal_rot = wp.zeros(self.num_envs, dtype=wp.quatf, device=self.device) + self.goal_pos = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device) + self.goal_pos_w = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device) + + # Initialize goal constants from Torch (avoid a one-off kernel launch). + default_root_pose = wp.to_torch(self.object.data.default_root_pose).to(self.device) + in_hand_pos = default_root_pose[:, 0:3].clone() + in_hand_pos[:, 2] -= 0.04 + self.in_hand_pos.assign(wp.from_torch(in_hand_pos, dtype=wp.vec3f)) + + goal_pos = torch.tensor([-0.2, -0.45, 0.68], device=self.device, dtype=torch.float32).repeat((self.num_envs, 1)) + self.goal_pos.assign(wp.from_torch(goal_pos, dtype=wp.vec3f)) + + goal_rot = torch.zeros((self.num_envs, 4), device=self.device, dtype=torch.float32) + goal_rot[:, 3] = 1.0 # (x, y, z, w) + self.goal_rot.assign(wp.from_torch(goal_rot, dtype=wp.quatf)) + + # initialize goal marker + self.goal_markers = VisualizationMarkers(self.cfg.goal_object_cfg) + + # Reduction buffers for consecutive_successes update (Warp-only). + self._num_resets = wp.zeros(1, dtype=wp.float32, device=self.device) + self._finished_cons_successes = wp.zeros(1, dtype=wp.float32, device=self.device) + # track successes + self.successes = wp.zeros(self.num_envs, dtype=wp.float32, device=self.device) + self.consecutive_successes = wp.zeros(1, dtype=wp.float32, device=self.device) + + # Persistent RL buffers (Warp). + self.actions = wp.zeros((self.num_envs, self.cfg.action_space), dtype=wp.float32, device=self.device) + self.observations = wp.zeros((self.num_envs, self.cfg.observation_space), dtype=wp.float32, device=self.device) + self.rewards = wp.zeros((self.num_envs,), dtype=wp.float32, device=self.device) + # Flag used as a print token for non-finite observations (Warp). + self.obs_nonfinite_flag = wp.zeros(1, dtype=wp.int32, device=self.device) + + # Intermediate values (Warp) -- mirrors the Torch env's `_compute_intermediate_values` fields. + self.fingertip_pos = wp.zeros((self.num_envs, self.num_fingertips), dtype=wp.vec3f, device=self.device) + self.fingertip_rot = wp.zeros((self.num_envs, self.num_fingertips), dtype=wp.quatf, device=self.device) + self.fingertip_velocities = wp.zeros( + (self.num_envs, self.num_fingertips), dtype=wp.spatial_vectorf, device=self.device + ) + + self.object_pose = wp.zeros(self.num_envs, dtype=wp.transformf, device=self.device) + self.object_vels = wp.zeros(self.num_envs, dtype=wp.spatial_vectorf, device=self.device) + + # RNG state (per-env) for randomizations in reset/goal resets. + self.rng_state = wp.zeros(self.num_envs, dtype=wp.uint32, device=self.device) + if self.cfg.seed is None: + self.cfg.seed = -1 + wp.launch( + initialize_rng_state, + dim=self.num_envs, + inputs=[ + self.cfg.seed, + self.rng_state, + ], + device=self.device, + ) + + # --------------------------------------------------------------------- + # Torch views / aliases + # --------------------------------------------------------------------- + + # Bind torch buffers to warp buffers (same pattern as Warp Cartpole). + self.torch_obs_buf = wp.to_torch(self.observations) + self.torch_reward_buf = wp.to_torch(self.rewards) + self.torch_reset_terminated = wp.to_torch(self.reset_terminated) + self.torch_reset_time_outs = wp.to_torch(self.reset_time_outs) + self.torch_episode_length_buf = self.episode_length_buf # already a torch tensor via wp.to_torch + + def _setup_scene(self): + # add hand, in-hand object, and goal object + self.hand = Articulation(self.cfg.robot_cfg) + self.object = Articulation(self.cfg.object_cfg) + # add ground plane + spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg()) + # clone and replicate (no need to filter for this environment) + self.scene.clone_environments(copy_from_source=False) + # add articulation to scene - we must register to scene to randomize with EventManager + self.scene.articulations["robot"] = self.hand + self.scene.articulations["object"] = self.object + # add lights + light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75)) + light_cfg.func("/World/Light", light_cfg) + + def _pre_physics_step(self, actions: wp.array) -> None: + # Store actions in a persistent Warp buffer (analogous to `actions.clone()` in the Torch env). + wp.copy(self.actions, actions) + + def _apply_action(self) -> None: + wp.launch( + apply_actions_to_targets, + dim=(self.num_envs, self.num_actuated_dofs), + inputs=[ + self.actions, + self.hand_dof_lower_limits, + self.hand_dof_upper_limits, + self.actuated_dof_indices, + self.cfg.act_moving_average, + self.prev_targets, + self.cur_targets, + ], + device=self.device, + ) + + # Apply position targets using mask method (CUDA graph safe). + # All joints are actuated for Allegro, so default masks (None = all) are correct. + self.hand.set_joint_position_target_mask(target=self.cur_targets) + + def _get_observations(self) -> dict: + # if self.cfg.asymmetric_obs: + # self.fingertip_force_sensors = self.hand.root_physx_view.get_link_incoming_joint_force()[ + # :, self.finger_bodies + # ] + if self.cfg.obs_type == "openai": + self.compute_reduced_observations() + elif self.cfg.obs_type == "full": + self.compute_full_observations() + else: + raise ValueError(f"Unknown obs_type: {self.cfg.obs_type}") + return {"policy": self.torch_obs_buf} + + def _get_rewards(self) -> None: + # Clear reduction buffers before launching the reward kernel. + # wp.assign(self._num_resets, 0.0) + # wp.assign(self._finished_cons_successes, 0.0) + self._num_resets.zero_() + self._finished_cons_successes.zero_() + wp.launch( + compute_rewards, + dim=self.num_envs, + inputs=[ + self.reset_buf, + self.object_pose, + self.in_hand_pos, + self.goal_rot, + self.cfg.dist_reward_scale, + self.cfg.rot_reward_scale, + self.cfg.rot_eps, + self.actions, + self.cfg.action_penalty_scale, + self.cfg.success_tolerance, + self.cfg.reach_goal_bonus, + self.cfg.fall_dist, + self.cfg.fall_penalty, + self.cfg.action_space, + self.reset_goal_buf, + self.successes, + self._num_resets, + self._finished_cons_successes, + self.rewards, + ], + device=self.device, + ) + + # A separate kernel is needed as Warp does not support thread synchronization for reductions. + wp.launch( + update_consecutive_successes_from_stats, + dim=1, + inputs=[ + self._num_resets, + self._finished_cons_successes, + self.cfg.av_factor, + self.consecutive_successes, + ], + device=self.device, + ) + + if "log" not in self.extras: + self.extras["log"] = dict() + # .mean() cannot be called here as it causes problems on stream + self.extras["log"]["consecutive_successes"] = wp.to_torch(self.consecutive_successes) + + # Reset goals for envs that reached the target (mask is `reset_goal_buf`). + # This avoids Torch-side index extraction and keeps the step graphable. + self._reset_target_pose(mask=self.reset_goal_buf) + + def _get_dones(self) -> None: + self._compute_intermediate_values() + + wp.launch( + get_dones, + dim=self.num_envs, + inputs=[ + self.max_episode_length, + self.object_pose, + self.in_hand_pos, + self.goal_rot, + self.cfg.fall_dist, + self.cfg.success_tolerance, + self.cfg.max_consecutive_success, + self.successes, + self._episode_length_buf_wp, + self.reset_terminated, + self.reset_time_outs, + self.reset_buf, + ], + device=self.device, + ) + + def _reset_idx(self, mask: wp.array | None = None): + if mask is None: + mask = self._ALL_ENV_MASK + + # resets articulation and rigid body attributes + super()._reset_idx(mask) + + # reset goals + self._reset_target_pose(mask=mask) + + # reset object + wp.launch( + reset_object, + dim=self.num_envs, + inputs=[ + self.object.data.default_root_pose, + self.env_origins, + self.cfg.reset_position_noise, + self.x_unit_vec, + self.y_unit_vec, + mask, + self.rng_state, + self.object.data.root_link_pose_w, + self.object.data.root_com_vel_w, + ], + device=self.device, + ) + + # reset hand + wp.launch( + reset_hand, + dim=self.num_envs, + inputs=[ + self.hand.data.default_joint_pos, + self.hand.data.default_joint_vel, + self.hand_dof_lower_limits, + self.hand_dof_upper_limits, + self.cfg.reset_dof_pos_noise, + self.cfg.reset_dof_vel_noise, + mask, + self.num_hand_dofs, + self.rng_state, + self.hand.data.joint_pos, + self.hand.data.joint_vel, + self.prev_targets, + self.cur_targets, + self.hand_dof_targets, + ], + device=self.device, + ) + + self.hand.set_joint_position_target_mask(target=self.cur_targets, env_mask=mask) + + wp.launch( + reset_successes, + dim=self.num_envs, + inputs=[ + mask, + self.successes, + ], + device=self.device, + ) + + self._compute_intermediate_values() + + def _reset_target_pose(self, env_ids: Sequence[int] | None = None, mask: wp.array | None = None): + # reset goal rotation + if mask is None: + if env_ids is None: + return + env_mask_list = [False] * self.num_envs + for env_id in env_ids: + env_mask_list[int(env_id)] = True + mask = wp.array(env_mask_list, dtype=wp.bool, device=self.device) + + # update goal pose and markers + wp.launch( + reset_target_pose, + dim=self.num_envs, + inputs=[ + mask, + self.x_unit_vec, + self.y_unit_vec, + self.env_origins, + self.goal_pos, + self.rng_state, + self.goal_rot, + self.reset_goal_buf, + self.goal_pos_w, + ], + device=self.device, + ) + + def _post_step_visualize(self) -> None: + """Update goal markers outside CUDA graph scope.""" + self.goal_markers.visualize(wp.to_torch(self.goal_pos_w), wp.to_torch(self.goal_rot)) + + def _compute_intermediate_values(self): + # data for hand/object (Warp version of the Torch env's `_compute_intermediate_values`) + wp.launch( + compute_intermediate_values, + dim=self.num_envs, + inputs=[ + self.hand.data.body_pos_w, + self.hand.data.body_quat_w, + self.hand.data.body_vel_w, + self.finger_bodies, + self.env_origins, + self.object.data.root_link_pose_w, + self.object.data.root_com_vel_w, + self.num_fingertips, + self.fingertip_pos, + self.fingertip_rot, + self.fingertip_velocities, + self.object_pose, + self.object_vels, + ], + device=self.device, + ) + + def compute_reduced_observations(self): + # Per https://arxiv.org/pdf/1808.00177.pdf Table 2 + # Fingertip positions + # Object Position, but not orientation + # Relative target orientation + wp.launch( + compute_reduced_observations, + dim=self.num_envs, + inputs=[ + self.fingertip_pos, + self.object_pose, + self.goal_rot, + self.actions, + self.num_fingertips, + self.cfg.action_space, + self.observations, + ], + device=self.device, + ) + # Warp-native non-finite sanitization + print-once. + wp.launch( + sanitize_and_print_once, + dim=(self.num_envs * self.cfg.observation_space), + inputs=[self.observations.flatten(), self.obs_nonfinite_flag], + device=self.device, + ) + self.obs_nonfinite_flag.zero_() + + def compute_full_observations(self): + wp.launch( + compute_full_observations, + dim=self.num_envs, + inputs=[ + self.hand.data.joint_pos, + self.hand.data.joint_vel, + self.hand_dof_lower_limits, + self.hand_dof_upper_limits, + self.cfg.vel_obs_scale, + self.object_pose, + self.object_vels, + self.in_hand_pos, + self.goal_rot, + self.fingertip_pos, + self.fingertip_rot, + self.fingertip_velocities, + self.actions, + self.num_hand_dofs, + self.num_fingertips, + self.cfg.action_space, + self.observations, + ], + device=self.device, + ) + # Warp-native non-finite sanitization + print-once. + wp.launch( + sanitize_and_print_once, + dim=(self.num_envs * self.cfg.observation_space), + inputs=[self.observations.flatten(), self.obs_nonfinite_flag], + device=self.device, + ) + self.obs_nonfinite_flag.zero_() diff --git a/source/isaaclab_tasks_experimental/pyproject.toml b/source/isaaclab_tasks_experimental/pyproject.toml new file mode 100644 index 000000000000..d90ac3536f16 --- /dev/null +++ b/source/isaaclab_tasks_experimental/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel", "toml"] +build-backend = "setuptools.build_meta" diff --git a/source/isaaclab_tasks_experimental/setup.py b/source/isaaclab_tasks_experimental/setup.py new file mode 100644 index 000000000000..ae77016f8984 --- /dev/null +++ b/source/isaaclab_tasks_experimental/setup.py @@ -0,0 +1,40 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Installation script for the 'isaaclab_tasks_experimental' python package.""" + +import os + +import toml +from setuptools import find_packages, setup + +# Obtain the extension data from the extension.toml file +EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__)) +# Read the extension.toml file +EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml")) + +# Installation operation +setup( + name="isaaclab_tasks_experimental", + author="Isaac Lab Project Developers", + maintainer="Isaac Lab Project Developers", + url=EXTENSION_TOML_DATA["package"]["repository"], + version=EXTENSION_TOML_DATA["package"]["version"], + description=EXTENSION_TOML_DATA["package"]["description"], + keywords=EXTENSION_TOML_DATA["package"]["keywords"], + include_package_data=True, + python_requires=">=3.10", + install_requires=["isaaclab_tasks"], + packages=find_packages(), + classifiers=[ + "Natural Language :: English", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Isaac Sim :: 5.0.0", + "Isaac Sim :: 5.1.0", + "Isaac Sim :: 6.0.0", + ], + zip_safe=False, +)