From 0949f42b811140f4fd694ab11e5daed6486b7456 Mon Sep 17 00:00:00 2001
From: Jichuan Hu <leo.hu.sh@gmail.com>
Date: Fri, 27 Feb 2026 01:50:52 -0800
Subject: [PATCH] Adds inhand manipulation warp env and WarpGraphCache utility

- Add WarpGraphCache capture-or-replay utility to isaaclab_experimental/utils
- Refactor direct_rl_env_warp to use WarpGraphCache, split step_warp_end
  into pre/post to run write_data_to_sim uncaptured between graph segments
---
 CONTRIBUTORS.md                               |   1 +
 .../reinforcement_learning/rsl_rl/train.py    |   3 +
 .../config/extension.toml                     |  39 +
 .../isaaclab_experimental/__init__.py         |  20 +
 .../isaaclab_experimental/envs/__init__.py    |  51 +
 .../envs/direct_rl_env_warp.py                | 818 +++++++++++++++
 .../envs/interactive_scene_warp.py            |  44 +
 .../envs/utils/__init__.py                    |   6 +
 .../envs/utils/spaces.py                      | 221 ++++
 .../isaaclab_experimental/utils/timer.py      | 270 +++++
 .../utils/warp_graph_cache.py                 |  43 +
 source/isaaclab_experimental/pyproject.toml   |   3 +
 source/isaaclab_experimental/setup.py         |  53 +
 source/isaaclab_newton/setup.py               |   2 +-
 .../isaaclab_rl/rsl_rl/vecenv_wrapper.py      |  11 +-
 .../config/extension.toml                     |  22 +
 .../docs/README.md                            |   3 +
 .../isaaclab_tasks_experimental/__init__.py   |  32 +
 .../direct/__init__.py                        |  10 +
 .../direct/allegro_hand/__init__.py           |  29 +
 .../allegro_hand/allegro_hand_warp_env_cfg.py | 136 +++
 .../direct/inhand_manipulation/__init__.py    |   4 +
 .../inhand_manipulation_warp_env.py           | 980 ++++++++++++++++++
 .../pyproject.toml                            |   3 +
 source/isaaclab_tasks_experimental/setup.py   |  40 +
 25 files changed, 2842 insertions(+), 2 deletions(-)
 create mode 100644 source/isaaclab_experimental/config/extension.toml
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/__init__.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/utils/spaces.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/utils/timer.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py
 create mode 100644 source/isaaclab_experimental/pyproject.toml
 create mode 100644 source/isaaclab_experimental/setup.py
 create mode 100644 source/isaaclab_tasks_experimental/config/extension.toml
 create mode 100644 source/isaaclab_tasks_experimental/docs/README.md
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py
 create mode 100644 source/isaaclab_tasks_experimental/pyproject.toml
 create mode 100644 source/isaaclab_tasks_experimental/setup.py

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 201c70bf727e..c67997cf40be 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -93,6 +93,7 @@ Guidelines for modifications:
 * Ji Yuan Feng
 * Jia Lin Yuan
 * Jiakai Zhang
+* Jichuan Hu
 * Jinghuan Shang
 * Jingzhou Liu
 * Jinqi Wei
diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py
index 5d7c90b95447..50d40de0d3b6 100644
--- a/scripts/reinforcement_learning/rsl_rl/train.py
+++ b/scripts/reinforcement_learning/rsl_rl/train.py
@@ -6,6 +6,7 @@
 """Script to train RL agent with RSL-RL."""
 
 import argparse
+import contextlib
 import importlib.metadata as metadata
 import logging
 import os
@@ -34,6 +35,8 @@
 logger = logging.getLogger(__name__)
 
 # PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
 
 RSL_RL_VERSION = "3.0.1"
 
diff --git a/source/isaaclab_experimental/config/extension.toml b/source/isaaclab_experimental/config/extension.toml
new file mode 100644
index 000000000000..d6f50f27f5f4
--- /dev/null
+++ b/source/isaaclab_experimental/config/extension.toml
@@ -0,0 +1,39 @@
+[package]
+
+# Note: Semantic Versioning is used: https://semver.org/
+version = "0.0.1"
+
+# Description
+title = "Experimental playground for upcoming IsaacLab features"
+description="Provides early access to future features that are not yet `production-ready`."
+readme  = "docs/README.md"
+repository = "https://github.com/isaac-sim/IsaacLab"
+category = "robotics"
+keywords = ["kit", "robotics", "learning", "ai"]
+
+[python.pipapi]
+requirements = [
+    "numpy",
+    "prettytable==3.3.0",
+    "toml",
+    "hidapi",
+    "gymnasium==0.29.0",
+    "trimesh"
+]
+
+modules = [
+    "numpy",
+    "prettytable",
+    "toml",
+    "hid",
+    "gymnasium",
+    "trimesh"
+]
+
+use_online_index=true
+
+[core]
+reloadable = false
+
+[[python.module]]
+name = "isaaclab_experimental"
diff --git a/source/isaaclab_experimental/isaaclab_experimental/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/__init__.py
new file mode 100644
index 000000000000..18e97e2dd188
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Package containing the core framework."""
+
+import os
+import toml
+from enum import IntEnum
+
+# Conveniences to other module directories via relative paths
+ISAACLAB_EXPERIMENTAL_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
+"""Path to the extension source directory."""
+
+ISAACLAB_EXPERIMENTAL_METADATA = toml.load(os.path.join(ISAACLAB_EXPERIMENTAL_EXT_DIR, "config", "extension.toml"))
+"""Extension metadata dictionary parsed from the extension.toml file."""
+
+# Configure the module-level variables
+__version__ = ISAACLAB_EXPERIMENTAL_METADATA["package"]["version"]
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py
new file mode 100644
index 000000000000..81c59dda7d51
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Sub-package for environment definitions.
+
+Environments define the interface between the agent and the simulation.
+In the simplest case, the environment provides the agent with the current
+observations and executes the actions provided by the agent. However, the
+environment can also provide additional information such as the current
+reward, done flag, and information about the current episode.
+
+There are two types of environment designing workflows:
+
+* **Manager-based**: The environment is decomposed into individual components (or managers)
+  for different aspects (such as computing observations, applying actions, and applying
+  randomization. The users mainly configure the managers and the environment coordinates the
+  managers and calls their functions.
+* **Direct**: The user implements all the necessary functionality directly into a single class
+  directly without the need for additional managers.
+
+Based on these workflows, there are the following environment classes for single and multi-agent RL:
+
+**Single-Agent RL:**
+
+* :class:`ManagerBasedEnv`: The manager-based workflow base environment which only provides the
+  agent with the current observations and executes the actions provided by the agent.
+* :class:`ManagerBasedRLEnv`: The manager-based workflow RL task environment which besides the
+  functionality of the base environment also provides additional Markov Decision Process (MDP)
+  related information such as the current reward, done flag, and information.
+* :class:`DirectRLEnv`: The direct workflow RL task environment which provides implementations for
+  implementing scene setup, computing dones, performing resets, and computing reward and observation.
+
+**Multi-Agent RL (MARL):**
+
+* :class:`DirectMARLEnv`: The direct workflow MARL task environment which provides implementations for
+  implementing scene setup, computing dones, performing resets, and computing reward and observation.
+
+For more information about the workflow design patterns, see the `Task Design Workflows`_ section.
+
+.. _`Task Design Workflows`: https://isaac-sim.github.io/IsaacLab/source/features/task_workflows.html
+"""
+
+from .direct_rl_env_warp import DirectRLEnvWarp  # noqa: F401
+from .interactive_scene_warp import InteractiveSceneWarp  # noqa: F401
+
+__all__ = [
+    "DirectRLEnvWarp",
+    "InteractiveSceneWarp",
+]
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py
new file mode 100644
index 000000000000..ebb1acadfcb9
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py
@@ -0,0 +1,818 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import contextlib
+import inspect
+import logging
+import math
+import os
+import weakref
+from abc import abstractmethod
+from dataclasses import MISSING
+from typing import Any, ClassVar
+
+import gymnasium as gym
+import numpy as np
+import torch
+
+# import omni.kit.app
+# import omni.log
+# import omni.physx
+import warp as wp
+
+from isaaclab.envs.common import VecEnvObs, VecEnvStepReturn
+from isaaclab.envs.direct_rl_env_cfg import DirectRLEnvCfg
+
+# from isaaclab.envs.ui import ViewportCameraController
+from isaaclab.managers import EventManager
+from isaaclab.sim import SimulationContext
+from isaaclab.sim.utils import use_stage
+from isaaclab.utils.noise import NoiseModel
+from isaaclab.utils.seed import configure_seed
+
+from isaaclab_experimental.envs.interactive_scene_warp import InteractiveSceneWarp
+from isaaclab_experimental.utils.timer import Timer
+from isaaclab_experimental.utils.warp_graph_cache import WarpGraphCache
+
+from .utils.spaces import sample_space, spec_to_gym_space
+
+# from isaacsim.core.simulation_manager import SimulationManager
+# from isaacsim.core.version import get_version
+
+
+# import logger
+logger = logging.getLogger(__name__)
+
+DEBUG_TIMER_STEP = os.environ.get("DEBUG_TIMER_STEP", "0") == "1"
+"""Enable outer step() timer only. Set DEBUG_TIMER_STEP=1 env var to enable."""
+
+DEBUG_TIMERS = os.environ.get("DEBUG_TIMERS", "0") == "1"
+"""Enable all fine-grained inner timers (adds wp.synchronize per sub-phase). Set DEBUG_TIMERS=1 env var to enable."""
+
+
+@wp.kernel
+def zero_mask_int32(
+    mask: wp.array(dtype=wp.bool),
+    data: wp.array(dtype=wp.int32),
+):
+    env_index = wp.tid()
+    if mask[env_index]:
+        data[env_index] = 0
+
+
+@wp.kernel
+def add_to_env(
+    data: wp.array(dtype=wp.int32),
+    value: wp.int32,
+):
+    env_index = wp.tid()
+    data[env_index] += value
+
+
+class DirectRLEnvWarp(gym.Env):
+    """The superclass for the direct workflow to design environments.
+
+    This class implements the core functionality for reinforcement learning (RL)
+    environments. It is designed to be used with any RL library. The class is designed
+    to be used with vectorized environments, i.e., the environment is expected to be run
+    in parallel with multiple sub-environments.
+
+    While the environment itself is implemented as a vectorized environment, we do not
+    inherit from :class:`gym.vector.VectorEnv`. This is mainly because the class adds
+    various methods (for wait and asynchronous updates) which are not required.
+    Additionally, each RL library typically has its own definition for a vectorized
+    environment. Thus, to reduce complexity, we directly use the :class:`gym.Env` over
+    here and leave it up to library-defined wrappers to take care of wrapping this
+    environment for their agents.
+
+    Note:
+        For vectorized environments, it is recommended to **only** call the :meth:`reset`
+        method once before the first call to :meth:`step`, i.e. after the environment is created.
+        After that, the :meth:`step` function handles the reset of terminated sub-environments.
+        in a vectorized environment.
+
+    """
+
+    is_vector_env: ClassVar[bool] = True
+    """Whether the environment is a vectorized environment."""
+    metadata: ClassVar[dict[str, Any]] = {
+        "render_modes": [None, "human", "rgb_array"],
+        # "isaac_sim_version": get_version(),
+    }
+    """Metadata for the environment."""
+
+    def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs):
+        """Initialize the environment.
+
+        Args:
+            cfg: The configuration object for the environment.
+            render_mode: The render mode for the environment. Defaults to None, which
+                is similar to ``"human"``.
+
+        Raises:
+            RuntimeError: If a simulation context already exists. The environment must always create one
+                since it configures the simulation context and controls the simulation.
+        """
+        # check that the config is valid
+        cfg.validate()
+        # store inputs to class
+        self.cfg = cfg
+        # store the render mode
+        self.render_mode = render_mode
+        # initialize internal variables
+        self._is_closed = False
+
+        # set the seed for the environment
+        if self.cfg.seed is not None:
+            self.cfg.seed = self.seed(self.cfg.seed)
+        else:
+            logger.warning("Seed not set for the environment. The environment creation may not be deterministic.")
+
+        # create a simulation context to control the simulator
+        if SimulationContext.instance() is None:
+            self.sim: SimulationContext = SimulationContext(self.cfg.sim)
+        else:
+            raise RuntimeError("Simulation context already exists. Cannot create a new one.")
+
+        # make sure torch is running on the correct device
+        if "cuda" in self.device:
+            torch.cuda.set_device(self.device)
+
+        # print useful information
+        print("[INFO]: Base environment:")
+        print(f"\tEnvironment device    : {self.device}")
+        print(f"\tEnvironment seed      : {self.cfg.seed}")
+        print(f"\tPhysics step-size     : {self.physics_dt}")
+        print(f"\tRendering step-size   : {self.physics_dt * self.cfg.sim.render_interval}")
+        print(f"\tEnvironment step-size : {self.step_dt}")
+
+        if self.cfg.sim.render_interval < self.cfg.decimation:
+            msg = (
+                f"The render interval ({self.cfg.sim.render_interval}) is smaller than the decimation "
+                f"({self.cfg.decimation}). Multiple render calls will happen for each environment step."
+                "If this is not intended, set the render interval to be equal to the decimation."
+            )
+            logger.warning(msg)
+
+        # generate scene
+        with Timer("[INFO]: Time taken for scene creation", "scene_creation"):
+            # set the stage context for scene creation steps which use the stage
+            with use_stage(self.sim.stage):
+                self.scene = InteractiveSceneWarp(self.cfg.scene)
+                self._setup_scene()
+                # attach_stage_to_usd_context()
+        print("[INFO]: Scene manager: ", self.scene)
+
+        # set up camera viewport controller
+        # viewport is not available in other rendering modes so the function will throw a warning
+        # FIXME: This needs to be fixed in the future when we unify the UI functionalities even for
+        # non-rendering modes.
+        has_gui = bool(self.sim.get_setting("/isaaclab/has_gui"))
+        offscreen_render = bool(self.sim.get_setting("/isaaclab/render/offscreen"))
+        if has_gui or offscreen_render:
+            # self.viewport_camera_controller = ViewportCameraController(self, self.cfg.viewer)
+            self.viewport_camera_controller = None
+        else:
+            self.viewport_camera_controller = None
+
+        # create event manager
+        # note: this is needed here (rather than after simulation play) to allow USD-related randomization events
+        #   that must happen before the simulation starts. Example: randomizing mesh scale
+        if self.cfg.events:
+            self.event_manager = EventManager(self.cfg.events, self)
+
+            # apply USD-related randomization events
+            if "prestartup" in self.event_manager.available_modes:
+                self.event_manager.apply(mode="prestartup")
+
+        # play the simulator to activate physics handles
+        # note: this activates the physics simulation view that exposes TensorAPIs
+        # note: when started in extension mode, first call sim.reset_async() and then initialize the managers
+        # if builtins.ISAAC_LAUNCHED_FROM_TERMINAL is False:
+        #     print("[INFO]: Starting the simulation. This may take a few seconds. Please wait...")
+        with Timer("[INFO]: Time taken for simulation start", "simulation_start"):
+            # since the reset can trigger callbacks which use the stage,
+            # we need to set the stage context here
+            with use_stage(self.sim.stage):
+                self.sim.reset()
+            # update scene to pre populate data buffers for assets and sensors.
+            # this is needed for the observation manager to get valid tensors for initialization.
+            # this shouldn't cause an issue since later on, users do a reset over all the
+            # environments so the lazy buffers would be reset.
+            self.scene.update(dt=self.physics_dt)
+
+        # check if debug visualization is has been implemented by the environment
+        source_code = inspect.getsource(self._set_debug_vis_impl)
+        self.has_debug_vis_implementation = "NotImplementedError" not in source_code
+        self._debug_vis_handle = None
+
+        # extend UI elements
+        # we need to do this here after all the managers are initialized
+        # this is because they dictate the sensors and commands right now
+        if bool(self.sim.settings.get("/isaaclab/visualizer")) and self.cfg.ui_window_class_type is not None:
+            self._window = self.cfg.ui_window_class_type(self, window_name="IsaacLab")
+        else:
+            # if no window, then we don't need to store the window
+            self._window = None
+
+        # allocate dictionary to store metrics
+        self.extras = {}
+
+        # initialize data and constants
+        # -- counter for simulation steps
+        self._sim_step_counter = 0
+        # -- counter for curriculum
+        self.common_step_counter = 0
+        # -- init buffers
+        self._episode_length_buf_wp = wp.zeros(self.num_envs, dtype=wp.int32, device=self.device)
+        self.episode_length_buf = wp.to_torch(self._episode_length_buf_wp)
+        self.reset_terminated = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device)
+        self.reset_time_outs = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device)
+        self.reset_buf = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device)
+        self._ALL_ENV_MASK = wp.ones(self.num_envs, dtype=wp.bool, device=self.device)
+
+        # Expected bindings:
+        self.torch_obs_buf: torch.Tensor = None
+        self.torch_reward_buf: torch.Tensor = None
+        self.torch_reset_terminated: torch.Tensor = None
+        self.torch_reset_time_outs: torch.Tensor = None
+        self.torch_episode_length_buf: torch.Tensor = None
+
+        # Warp CUDA graph cache for capture-or-replay
+        self._graph_cache = WarpGraphCache()
+
+        # setup the action and observation spaces for Gym
+        self._configure_gym_env_spaces()
+
+        # setup noise cfg for adding action and observation noise
+        if self.cfg.action_noise_model:
+            self._action_noise_model: NoiseModel = self.cfg.action_noise_model.class_type(
+                self.cfg.action_noise_model, num_envs=self.num_envs, device=self.device
+            )
+        if self.cfg.observation_noise_model:
+            self._observation_noise_model: NoiseModel = self.cfg.observation_noise_model.class_type(
+                self.cfg.observation_noise_model, num_envs=self.num_envs, device=self.device
+            )
+
+        # perform events at the start of the simulation
+        if self.cfg.events:
+            # we print it here to make the logging consistent
+            print("[INFO] Event Manager: ", self.event_manager)
+
+            if "startup" in self.event_manager.available_modes:
+                self.event_manager.apply(mode="startup")
+
+        # set the framerate of the gym video recorder wrapper so that the playback speed of the produced
+        # video matches the simulation
+        self.metadata["render_fps"] = 1 / self.step_dt
+
+        # print the environment information
+        print("[INFO]: Completed setting up the environment...")
+
+    def __del__(self):
+        """Cleanup for the environment."""
+        # Suppress errors during Python shutdown to avoid noisy tracebacks
+        # Note: contextlib may be None during interpreter shutdown
+        if contextlib is not None:
+            with contextlib.suppress(ImportError, AttributeError, TypeError):
+                self.close()
+
+    """
+    Properties.
+    """
+
+    @property
+    def num_envs(self) -> int:
+        """The number of instances of the environment that are running."""
+        return self.scene.num_envs
+
+    @property
+    def physics_dt(self) -> float:
+        """The physics time-step (in s).
+
+        This is the lowest time-decimation at which the simulation is happening.
+        """
+        return self.cfg.sim.dt
+
+    @property
+    def step_dt(self) -> float:
+        """The environment stepping time-step (in s).
+
+        This is the time-step at which the environment steps forward.
+        """
+        return self.cfg.sim.dt * self.cfg.decimation
+
+    @property
+    def device(self):
+        """The device on which the environment is running."""
+        return self.sim.device
+
+    @property
+    def max_episode_length_s(self) -> float:
+        """Maximum episode length in seconds."""
+        return self.cfg.episode_length_s
+
+    @property
+    def max_episode_length(self):
+        """The maximum episode length in steps adjusted from s."""
+        return math.ceil(self.max_episode_length_s / (self.cfg.sim.dt * self.cfg.decimation))
+
+    """
+    Operations.
+    """
+
+    def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[VecEnvObs, dict]:
+        """Resets all the environments and returns observations.
+
+        This function calls the :meth:`_reset_idx` function to reset all the environments.
+        However, certain operations, such as procedural terrain generation, that happened during initialization
+        are not repeated.
+
+        Args:
+            seed: The seed to use for randomization. Defaults to None, in which case the seed is not set.
+            options: Additional information to specify how the environment is reset. Defaults to None.
+
+                Note:
+                    This argument is used for compatibility with Gymnasium environment definition.
+
+        Returns:
+            A tuple containing the observations and extras.
+        """
+        # set the seed
+        if seed is not None:
+            self.seed(seed)
+
+        # reset state of scene
+        self._reset_idx(self._ALL_ENV_MASK)
+
+        # update articulation kinematics
+        self.scene.write_data_to_sim()
+
+        # if sensors are added to the scene, make sure we render to reflect changes in reset
+        if hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset:
+            self.sim.render()
+
+        # if self.cfg.wait_for_textures and self.sim.has_rtx_sensors():
+        #     while SimulationManager.assets_loading():
+        #         self.sim.render()
+
+        # return observations
+        self._get_observations()
+        return {"policy": self.torch_obs_buf.clone()}, self.extras
+
+    @Timer(name="env_step", msg="Step took:", enable=DEBUG_TIMER_STEP or DEBUG_TIMERS)
+    def step(self, action: torch.Tensor) -> VecEnvStepReturn:
+        """Execute one time-step of the environment's dynamics.
+
+        The environment steps forward at a fixed time-step, while the physics simulation is decimated at a
+        lower time-step. This is to ensure that the simulation is stable. These two time-steps can be configured
+        independently using the :attr:`DirectRLEnvCfg.decimation` (number of simulation steps per environment step)
+        and the :attr:`DirectRLEnvCfg.sim.physics_dt` (physics time-step). Based on these parameters, the environment
+        time-step is computed as the product of the two.
+
+        This function performs the following steps:
+
+        1. Pre-process the actions before stepping through the physics.
+        2. Apply the actions to the simulator and step through the physics in a decimated manner.
+        3. Compute the reward and done signals.
+        4. Reset environments that have terminated or reached the maximum episode length.
+        5. Apply interval events if they are enabled.
+        6. Compute observations.
+
+        Args:
+            action: The actions to apply on the environment. Shape is (num_envs, action_dim).
+
+        Returns:
+            A tuple containing the observations, rewards, resets (terminated and truncated) and extras.
+        """
+
+        action = action.to(self.device)
+        # add action noise
+        if self.cfg.action_noise_model:
+            action = self._action_noise_model(action)
+
+        # process actions, #TODO pass the torch tensor directly.
+        with Timer(name="pre_physics", msg="Pre-physics step took:", enable=DEBUG_TIMERS):
+            self._pre_physics_step(
+                wp.from_torch(action)
+            )  # Creates a tensor and discards it. Not graphable unless training loop reuses the same pointer.
+
+        # check if we need to do rendering within the physics loop
+        # note: checked here once to avoid multiple checks within the loop
+        _has_rtx = hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors()
+        is_rendering = bool(self.sim.settings.get("/isaaclab/visualizer")) or _has_rtx
+
+        # perform physics stepping
+        with Timer(name="physics_loop", msg="Physics loop took:", enable=DEBUG_TIMERS):
+            for _ in range(self.cfg.decimation):
+                self._sim_step_counter += 1
+                # set actions into buffers
+                # simulate
+                with Timer(name="apply_action", msg="Action processing step took:", enable=DEBUG_TIMERS):
+                    self._graph_cache.capture_or_replay("action", self.step_warp_action)
+
+                # write_data_to_sim runs outside the CUDA graph because _apply_actuator_model
+                # uses torch ops (wp.to_torch + torch arithmetic) that cross CUDA streams.
+                with Timer(name="write_data_to_sim_loop", msg="Write data to sim (loop) took:", enable=DEBUG_TIMERS):
+                    self.scene.write_data_to_sim()
+
+                with Timer(name="simulate", msg="Newton simulation step took:", enable=DEBUG_TIMERS):
+                    self.sim.step(render=False)
+                # render between steps only if the GUI or an RTX sensor needs it
+                # note: we assume the render interval to be the shortest accepted rendering interval.
+                #    If a camera needs rendering at a faster frequency, this will lead to unexpected behavior.
+                if self._sim_step_counter % self.cfg.sim.render_interval == 0 and is_rendering:
+                    self.sim.render()
+                # update buffers at sim dt
+                with Timer(name="scene_update", msg="Scene update took:", enable=DEBUG_TIMERS):
+                    self.scene.update(dt=self.physics_dt)
+
+        self.common_step_counter += 1  # total step (common for all envs)
+        with Timer(name="end_pre_graph", msg="End pre-graph took:", enable=DEBUG_TIMERS):
+            self._graph_cache.capture_or_replay("end_pre", self._step_warp_end_pre)
+        # write_data_to_sim runs uncaptured — it uses torch ops that cross CUDA streams.
+        with Timer(name="write_data_to_sim_post", msg="Write data to sim (post-reset) took:", enable=DEBUG_TIMERS):
+            self.scene.write_data_to_sim()
+        with Timer(name="end_post_graph", msg="End post-graph took:", enable=DEBUG_TIMERS):
+            self._graph_cache.capture_or_replay("end_post", self._step_warp_end_post)
+
+        # Visualization hook — runs after CUDA graph scope. Override in subclass
+        # to update markers or other non-graphable visual elements.
+        with Timer(name="visualize", msg="Visualize took:", enable=DEBUG_TIMERS):
+            self._post_step_visualize()
+
+        # return observations, rewards, resets and extras
+        return (
+            {"policy": self.torch_obs_buf.clone()},
+            self.torch_reward_buf,
+            self.torch_reset_terminated,
+            self.torch_reset_time_outs,
+            self.extras,
+        )
+
+    def _post_step_visualize(self) -> None:
+        """Hook for updating visualization markers after CUDA graph scope.
+
+        Override in subclass to update markers or other non-graphable visual
+        elements (e.g., those requiring wp.to_torch + .cpu().numpy()).
+        This runs every step, outside any CUDA graph capture.
+        """
+        pass
+
+    def step_warp_action(self) -> None:
+        self._apply_action()
+        # Note: scene.write_data_to_sim() is called separately outside the CUDA graph
+        # capture scope because it invokes _apply_actuator_model() which uses torch
+        # arithmetic (wp.to_torch + torch ops). This would cause a CUDA stream crossing
+        # error during graph capture. Moving it outside is safe since it runs every step.
+
+    def _step_warp_end_pre(self) -> None:
+        """Capturable portion before write_data_to_sim (pure warp kernels)."""
+        wp.launch(
+            add_to_env,
+            dim=self.num_envs,
+            inputs=[
+                self._episode_length_buf_wp,
+                1,
+            ],
+        )
+        self._get_dones()
+        self._get_rewards()
+
+        # -- reset envs that terminated/timed-out and log the episode information
+        self._reset_idx(mask=self.reset_buf)
+
+    def _step_warp_end_post(self) -> None:
+        """Capturable portion after write_data_to_sim (pure warp kernels)."""
+        # if sensors are added to the scene, make sure we render to reflect changes in reset
+        # if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset:
+        #    self.sim.render()
+
+        # TODO We could split it out.
+        # post-step: step interval event
+        # if self.cfg.events:
+        #    if "interval" in self.event_manager.available_modes:
+        #        self.event_manager.apply(mode="interval", dt=self.step_dt)
+
+        # update observations
+        self._get_observations()
+
+        # add observation noise
+        # note: we apply no noise to the state space (since it is used for critic networks)
+        # if self.cfg.observation_noise_model:
+        #    self.obs_buf["policy"] = self._observation_noise_model(self.obs_buf["policy"])
+
+    @staticmethod
+    def seed(seed: int = -1) -> int:
+        """Set the seed for the environment.
+
+        Args:
+            seed: The seed for random generator. Defaults to -1.
+
+        Returns:
+            The seed used for random generator.
+        """
+        # set seed for replicator
+        try:
+            import omni.replicator.core as rep
+
+            rep.set_global_seed(seed)
+        except ModuleNotFoundError:
+            pass
+        # set seed for torch and other libraries
+        return configure_seed(seed)
+
+    def render(self, recompute: bool = False) -> np.ndarray | None:
+        """Run rendering without stepping through the physics.
+
+        By convention, if mode is:
+
+        - **human**: Render to the current display and return nothing. Usually for human consumption.
+        - **rgb_array**: Return an numpy.ndarray with shape (x, y, 3), representing RGB values for an
+          x-by-y pixel image, suitable for turning into a video.
+
+        Args:
+            recompute: Whether to force a render even if the simulator has already rendered the scene.
+                Defaults to False.
+
+        Returns:
+            The rendered image as a numpy array if mode is "rgb_array". Otherwise, returns None.
+
+        Raises:
+            RuntimeError: If mode is set to "rgb_data" and simulation render mode does not support it.
+                In this case, the simulation render mode must be set to ``RenderMode.PARTIAL_RENDERING``
+                or ``RenderMode.FULL_RENDERING``.
+            NotImplementedError: If an unsupported rendering mode is specified.
+        """
+        # run a rendering step of the simulator
+        # if we have rtx sensors, we do not need to render again sim
+        if not (hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors()) and not recompute:
+            self.sim.render()
+        # decide the rendering mode
+        if self.render_mode == "human" or self.render_mode is None:
+            return None
+        elif self.render_mode == "rgb_array":
+            # check that if any render could have happened
+            has_gui = bool(self.sim.get_setting("/isaaclab/has_gui"))
+            offscreen_render = bool(self.sim.get_setting("/isaaclab/render/offscreen"))
+            # Rendering is possible if we have GUI or offscreen rendering enabled
+            can_render = has_gui or offscreen_render
+
+            if not can_render:
+                render_mode_name = "NO_GUI_OR_RENDERING"
+                raise RuntimeError(
+                    f"Cannot render '{self.render_mode}' when the simulation render mode is"
+                    f" '{render_mode_name}'. Please set the simulation render mode"
+                    " to:'PARTIAL_RENDERING' or"
+                    " 'FULL_RENDERING'. If running headless, make"
+                    " sure --enable_cameras is set."
+                )
+            # create the annotator if it does not exist
+            if not hasattr(self, "_rgb_annotator"):
+                import omni.replicator.core as rep
+
+                # create render product
+                self._render_product = rep.create.render_product(
+                    self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution
+                )
+                # create rgb annotator -- used to read data from the render product
+                self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu")
+                self._rgb_annotator.attach([self._render_product])
+            # obtain the rgb data
+            rgb_data = self._rgb_annotator.get_data()
+            # convert to numpy array
+            rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape)
+            # return the rgb data
+            # note: initially the renerer is warming up and returns empty data
+            if rgb_data.size == 0:
+                return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8)
+            else:
+                return rgb_data[:, :, :3]
+        else:
+            raise NotImplementedError(
+                f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}."
+            )
+
+    def close(self):
+        """Cleanup for the environment."""
+        if not self._is_closed:
+            # close entities related to the environment
+            # note: this is order-sensitive to avoid any dangling references
+            if self.cfg.events:
+                del self.event_manager
+            del self.scene
+            if self.viewport_camera_controller is not None:
+                del self.viewport_camera_controller
+
+            # # clear callbacks and instance
+            # if float(".".join(get_version()[2])) >= 5:
+            #     if self.cfg.sim.create_stage_in_memory:
+            #         # detach physx stage
+            #         omni.physx.get_physx_simulation_interface().detach_stage()
+            #         self.sim.stop()
+            #         self.sim.clear()
+
+            # self.sim.clear_all_callbacks()
+            self.sim.clear_instance()
+
+            # destroy the window
+            if self._window is not None:
+                self._window = None
+            # update closing status
+            self._is_closed = True
+
+    """
+    Operations - Debug Visualization.
+    """
+
+    def set_debug_vis(self, debug_vis: bool) -> bool:
+        """Toggles the environment debug visualization.
+
+        Args:
+            debug_vis: Whether to visualize the environment debug visualization.
+
+        Returns:
+            Whether the debug visualization was successfully set. False if the environment
+            does not support debug visualization.
+        """
+        # check if debug visualization is supported
+        if not self.has_debug_vis_implementation:
+            return False
+        # toggle debug visualization objects
+        self._set_debug_vis_impl(debug_vis)
+        # toggle debug visualization handles
+        if debug_vis:
+            import omni.kit.app
+
+            # create a subscriber for the post update event if it doesn't exist
+            if self._debug_vis_handle is None:
+                app_interface = omni.kit.app.get_app_interface()
+                self._debug_vis_handle = app_interface.get_post_update_event_stream().create_subscription_to_pop(
+                    lambda event, obj=weakref.proxy(self): obj._debug_vis_callback(event)
+                )
+        else:
+            # remove the subscriber if it exists
+            if self._debug_vis_handle is not None:
+                self._debug_vis_handle.unsubscribe()
+                self._debug_vis_handle = None
+        # return success
+        return True
+
+    """
+    Helper functions.
+    """
+
+    def _configure_gym_env_spaces(self):
+        """Configure the action and observation spaces for the Gym environment."""
+        # show deprecation message and overwrite configuration
+        if self.cfg.num_actions is not None:
+            logger.warning("DirectRLEnvCfg.num_actions is deprecated. Use DirectRLEnvCfg.action_space instead.")
+            if isinstance(self.cfg.action_space, type(MISSING)):
+                self.cfg.action_space = self.cfg.num_actions
+        if self.cfg.num_observations is not None:
+            logger.warning(
+                "DirectRLEnvCfg.num_observations is deprecated. Use DirectRLEnvCfg.observation_space instead."
+            )
+            if isinstance(self.cfg.observation_space, type(MISSING)):
+                self.cfg.observation_space = self.cfg.num_observations
+        if self.cfg.num_states is not None:
+            logger.warning("DirectRLEnvCfg.num_states is deprecated. Use DirectRLEnvCfg.state_space instead.")
+            if isinstance(self.cfg.state_space, type(MISSING)):
+                self.cfg.state_space = self.cfg.num_states
+
+        # set up spaces
+        self.single_observation_space = gym.spaces.Dict()
+        self.single_observation_space["policy"] = spec_to_gym_space(self.cfg.observation_space)
+        self.single_action_space = spec_to_gym_space(self.cfg.action_space)
+
+        # batch the spaces for vectorized environments
+        self.observation_space = gym.vector.utils.batch_space(self.single_observation_space["policy"], self.num_envs)
+        self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs)
+
+        # optional state space for asymmetric actor-critic architectures
+        self.state_space = None
+        if self.cfg.state_space:
+            self.single_observation_space["critic"] = spec_to_gym_space(self.cfg.state_space)
+            self.state_space = gym.vector.utils.batch_space(self.single_observation_space["critic"], self.num_envs)
+
+        # instantiate actions (needed for tasks for which the observations computation is dependent on the actions)
+        self.actions = sample_space(self.single_action_space, self.sim.device, batch_size=self.num_envs, fill_value=0)
+
+    def _reset_idx(self, mask: wp.array | None = None):
+        """Reset environments based on a boolean mask.
+
+        Args:
+            mask: Boolean mask indicating which environments to reset.
+                Shape is (num_envs,). If None, all environments are reset.
+        """
+        if mask is None:
+            mask = self._ALL_ENV_MASK
+        self.scene.reset(env_ids=None, env_mask=mask)
+
+        # apply events such as randomization for environments that need a reset
+        # if self.cfg.events:
+        #    if "reset" in self.event_manager.available_modes:
+        #        env_step_count = self._sim_step_counter // self.cfg.decimation
+        #        self.event_manager.apply(mode="reset", env_ids=env_ids, global_env_step_count=env_step_count)
+
+        # reset noise models
+        # if self.cfg.action_noise_model:
+        #    self._action_noise_model.reset(env_ids)
+        # if self.cfg.observation_noise_model:
+        #    self._observation_noise_model.reset(env_ids)
+
+        # reset the episode length buffer
+        wp.launch(
+            zero_mask_int32,
+            dim=self.num_envs,
+            inputs=[
+                mask,
+                self._episode_length_buf_wp,
+            ],
+        )
+
+    """
+    Implementation-specific functions.
+    """
+
+    def _setup_scene(self):
+        """Setup the scene for the environment.
+
+        This function is responsible for creating the scene objects and setting up the scene for the environment.
+        The scene creation can happen through :class:`isaaclab.scene.InteractiveSceneCfg` or through
+        directly creating the scene objects and registering them with the scene manager.
+
+        We leave the implementation of this function to the derived classes. If the environment does not require
+        any explicit scene setup, the function can be left empty.
+        """
+        pass
+
+    @abstractmethod
+    def _pre_physics_step(self, actions: wp.array) -> None:
+        """Pre-process actions before stepping through the physics.
+
+        This function is responsible for pre-processing the actions before stepping through the physics.
+        It is called before the physics stepping (which is decimated).
+
+        Args:
+            actions: The actions to apply on the environment. Shape is (num_envs, action_dim).
+        """
+        raise NotImplementedError(f"Please implement the '_pre_physics_step' method for {self.__class__.__name__}.")
+
+    @abstractmethod
+    def _apply_action(self) -> None:
+        """Apply actions to the simulator.
+
+        This function is responsible for applying the actions to the simulator. It is called at each
+        physics time-step. Must be pure warp (no torch ops) to be CUDA graph capturable.
+        """
+        raise NotImplementedError(f"Please implement the '_apply_action' method for {self.__class__.__name__}.")
+
+    @abstractmethod
+    def _get_observations(self) -> None:
+        """Compute the observations for the environment.
+
+        Writes results into the observation buffers (e.g., ``self.obs_buf``).
+        """
+        raise NotImplementedError(f"Please implement the '_get_observations' method for {self.__class__.__name__}.")
+
+    def _get_states(self) -> VecEnvObs | None:
+        """Compute and return the states for the environment.
+
+        The state-space is used for asymmetric actor-critic architectures. It is configured
+        using the :attr:`DirectRLEnvCfg.state_space` parameter.
+
+        Returns:
+            The states for the environment. If the environment does not have a state-space, the function
+            returns a None.
+        """
+        return None  # noqa: R501
+
+    @abstractmethod
+    def _get_rewards(self) -> None:
+        """Compute the rewards for the environment.
+
+        Writes results into the reward buffer (e.g., ``self.reward_buf``).
+        """
+        raise NotImplementedError(f"Please implement the '_get_rewards' method for {self.__class__.__name__}.")
+
+    @abstractmethod
+    def _get_dones(self) -> None:
+        """Compute the done flags for the environment.
+
+        Writes results into the done buffers (e.g., ``self.reset_terminated``, ``self.reset_time_outs``).
+        """
+        raise NotImplementedError(f"Please implement the '_get_dones' method for {self.__class__.__name__}.")
+
+    def _set_debug_vis_impl(self, debug_vis: bool):
+        """Set debug visualization into visualization objects.
+
+        This function is responsible for creating the visualization objects if they don't exist
+        and input ``debug_vis`` is True. If the visualization objects exist, the function should
+        set their visibility into the stage.
+        """
+        raise NotImplementedError(f"Debug visualization is not implemented for {self.__class__.__name__}.")
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py
new file mode 100644
index 000000000000..f792b994f67b
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Warp-native interactive scene with env_mask support for reset."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import warp as wp
+
+from isaaclab.scene import InteractiveScene
+
+
+class InteractiveSceneWarp(InteractiveScene):
+    """Interactive scene with warp-native env_mask support for reset.
+
+    Extends :class:`InteractiveScene` to accept a boolean warp mask for selective resets,
+    avoiding the need to convert between env_ids and masks.
+    """
+
+    def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None = None):
+        """Reset scene entities using either env_ids or a boolean env_mask.
+
+        Args:
+            env_ids: The indices of the environments to reset. Defaults to None (all instances).
+            env_mask: Boolean warp mask of shape (num_envs,). Defaults to None.
+        """
+        # -- assets (support env_mask)
+        for articulation in self._articulations.values():
+            articulation.reset(env_ids, env_mask=env_mask)
+        for deformable_object in self._deformable_objects.values():
+            deformable_object.reset(env_ids)
+        for rigid_object in self._rigid_objects.values():
+            rigid_object.reset(env_ids, env_mask=env_mask)
+        for surface_gripper in self._surface_grippers.values():
+            surface_gripper.reset(env_ids)
+        for rigid_object_collection in self._rigid_object_collections.values():
+            rigid_object_collection.reset(env_ids, env_mask=env_mask)
+        # -- sensors (no env_mask support)
+        for sensor in self._sensors.values():
+            sensor.reset(env_ids)
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py
new file mode 100644
index 000000000000..d28381b15b76
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Sub-package for environment utils."""
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/utils/spaces.py b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/spaces.py
new file mode 100644
index 000000000000..171e47e9debe
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/spaces.py
@@ -0,0 +1,221 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gymnasium as gym
+import json
+import numpy as np
+import torch
+from typing import Any
+
+from isaaclab.envs.common import SpaceType
+
+
+def spec_to_gym_space(spec: SpaceType) -> gym.spaces.Space:
+    """Generate an appropriate Gymnasium space according to the given space specification.
+
+    Args:
+        spec: Space specification.
+
+    Returns:
+        Gymnasium space.
+
+    Raises:
+        ValueError: If the given space specification is not valid/supported.
+    """
+    if isinstance(spec, gym.spaces.Space):
+        return spec
+    # fundamental spaces
+    # Box
+    elif isinstance(spec, int):
+        return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(spec,))
+    elif isinstance(spec, list) and all(isinstance(x, int) for x in spec):
+        return gym.spaces.Box(low=-np.inf, high=np.inf, shape=spec)
+    # Discrete
+    elif isinstance(spec, set) and len(spec) == 1:
+        return gym.spaces.Discrete(n=next(iter(spec)))
+    # MultiDiscrete
+    elif isinstance(spec, list) and all(isinstance(x, set) and len(x) == 1 for x in spec):
+        return gym.spaces.MultiDiscrete(nvec=[next(iter(x)) for x in spec])
+    # composite spaces
+    # Tuple
+    elif isinstance(spec, tuple):
+        return gym.spaces.Tuple([spec_to_gym_space(x) for x in spec])
+    # Dict
+    elif isinstance(spec, dict):
+        return gym.spaces.Dict({k: spec_to_gym_space(v) for k, v in spec.items()})
+    raise ValueError(f"Unsupported space specification: {spec}")
+
+
+def sample_space(space: gym.spaces.Space, device: str, batch_size: int = -1, fill_value: float | None = None) -> Any:
+    """Sample a Gymnasium space where the data container are PyTorch tensors.
+
+    Args:
+        space: Gymnasium space.
+        device: The device where the tensor should be created.
+        batch_size: Batch size. If the specified value is greater than zero, a batched space will be created and sampled from it.
+        fill_value: The value to fill the created tensors with. If None (default value), tensors will keep their random values.
+
+    Returns:
+        Tensorized sampled space.
+    """
+
+    def tensorize(s, x):
+        if isinstance(s, gym.spaces.Box):
+            tensor = torch.tensor(x, device=device, dtype=torch.float32).reshape(batch_size, *s.shape)
+            if fill_value is not None:
+                tensor.fill_(fill_value)
+            return tensor
+        elif isinstance(s, gym.spaces.Discrete):
+            if isinstance(x, np.ndarray):
+                tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, 1)
+                if fill_value is not None:
+                    tensor.fill_(int(fill_value))
+                return tensor
+            elif isinstance(x, np.number) or type(x) in [int, float]:
+                tensor = torch.tensor([x], device=device, dtype=torch.int64).reshape(batch_size, 1)
+                if fill_value is not None:
+                    tensor.fill_(int(fill_value))
+                return tensor
+        elif isinstance(s, gym.spaces.MultiDiscrete):
+            if isinstance(x, np.ndarray):
+                tensor = torch.tensor(x, device=device, dtype=torch.int64).reshape(batch_size, *s.shape)
+                if fill_value is not None:
+                    tensor.fill_(int(fill_value))
+                return tensor
+        elif isinstance(s, gym.spaces.Dict):
+            return {k: tensorize(_s, x[k]) for k, _s in s.items()}
+        elif isinstance(s, gym.spaces.Tuple):
+            return tuple([tensorize(_s, v) for _s, v in zip(s, x)])
+
+    sample = (gym.vector.utils.batch_space(space, batch_size) if batch_size > 0 else space).sample()
+    return tensorize(space, sample)
+
+
+def serialize_space(space: SpaceType) -> str:
+    """Serialize a space specification as JSON.
+
+    Args:
+        space: Space specification.
+
+    Returns:
+        Serialized JSON representation.
+    """
+    # Gymnasium spaces
+    if isinstance(space, gym.spaces.Discrete):
+        return json.dumps({"type": "gymnasium", "space": "Discrete", "n": int(space.n)})
+    elif isinstance(space, gym.spaces.Box):
+        return json.dumps({
+            "type": "gymnasium",
+            "space": "Box",
+            "low": space.low.tolist(),
+            "high": space.high.tolist(),
+            "shape": space.shape,
+        })
+    elif isinstance(space, gym.spaces.MultiDiscrete):
+        return json.dumps({"type": "gymnasium", "space": "MultiDiscrete", "nvec": space.nvec.tolist()})
+    elif isinstance(space, gym.spaces.Tuple):
+        return json.dumps({"type": "gymnasium", "space": "Tuple", "spaces": tuple(map(serialize_space, space.spaces))})
+    elif isinstance(space, gym.spaces.Dict):
+        return json.dumps(
+            {"type": "gymnasium", "space": "Dict", "spaces": {k: serialize_space(v) for k, v in space.spaces.items()}}
+        )
+    # Python data types
+    # Box
+    elif isinstance(space, int) or (isinstance(space, list) and all(isinstance(x, int) for x in space)):
+        return json.dumps({"type": "python", "space": "Box", "value": space})
+    # Discrete
+    elif isinstance(space, set) and len(space) == 1:
+        return json.dumps({"type": "python", "space": "Discrete", "value": next(iter(space))})
+    # MultiDiscrete
+    elif isinstance(space, list) and all(isinstance(x, set) and len(x) == 1 for x in space):
+        return json.dumps({"type": "python", "space": "MultiDiscrete", "value": [next(iter(x)) for x in space]})
+    # composite spaces
+    # Tuple
+    elif isinstance(space, tuple):
+        return json.dumps({"type": "python", "space": "Tuple", "value": [serialize_space(x) for x in space]})
+    # Dict
+    elif isinstance(space, dict):
+        return json.dumps(
+            {"type": "python", "space": "Dict", "value": {k: serialize_space(v) for k, v in space.items()}}
+        )
+    raise ValueError(f"Unsupported space ({space})")
+
+
+def deserialize_space(string: str) -> gym.spaces.Space:
+    """Deserialize a space specification encoded as JSON.
+
+    Args:
+        string: Serialized JSON representation.
+
+    Returns:
+        Space specification.
+    """
+    obj = json.loads(string)
+    # Gymnasium spaces
+    if obj["type"] == "gymnasium":
+        if obj["space"] == "Discrete":
+            return gym.spaces.Discrete(n=obj["n"])
+        elif obj["space"] == "Box":
+            return gym.spaces.Box(low=np.array(obj["low"]), high=np.array(obj["high"]), shape=obj["shape"])
+        elif obj["space"] == "MultiDiscrete":
+            return gym.spaces.MultiDiscrete(nvec=np.array(obj["nvec"]))
+        elif obj["space"] == "Tuple":
+            return gym.spaces.Tuple(spaces=tuple(map(deserialize_space, obj["spaces"])))
+        elif obj["space"] == "Dict":
+            return gym.spaces.Dict(spaces={k: deserialize_space(v) for k, v in obj["spaces"].items()})
+        else:
+            raise ValueError(f"Unsupported space ({obj['spaces']})")
+    # Python data types
+    elif obj["type"] == "python":
+        if obj["space"] == "Discrete":
+            return {obj["value"]}
+        elif obj["space"] == "Box":
+            return obj["value"]
+        elif obj["space"] == "MultiDiscrete":
+            return [{x} for x in obj["value"]]
+        elif obj["space"] == "Tuple":
+            return tuple(map(deserialize_space, obj["value"]))
+        elif obj["space"] == "Dict":
+            return {k: deserialize_space(v) for k, v in obj["value"].items()}
+        else:
+            raise ValueError(f"Unsupported space ({obj['spaces']})")
+    else:
+        raise ValueError(f"Unsupported type ({obj['type']})")
+
+
+def replace_env_cfg_spaces_with_strings(env_cfg: object) -> object:
+    """Replace spaces objects with their serialized JSON representations in an environment config.
+
+    Args:
+        env_cfg: Environment config instance.
+
+    Returns:
+        Environment config instance with spaces replaced if any.
+    """
+    for attr in ["observation_space", "action_space", "state_space"]:
+        if hasattr(env_cfg, attr):
+            setattr(env_cfg, attr, serialize_space(getattr(env_cfg, attr)))
+    for attr in ["observation_spaces", "action_spaces"]:
+        if hasattr(env_cfg, attr):
+            setattr(env_cfg, attr, {k: serialize_space(v) for k, v in getattr(env_cfg, attr).items()})
+    return env_cfg
+
+
+def replace_strings_with_env_cfg_spaces(env_cfg: object) -> object:
+    """Replace spaces objects with their serialized JSON representations in an environment config.
+
+    Args:
+        env_cfg: Environment config instance.
+
+    Returns:
+        Environment config instance with spaces replaced if any.
+    """
+    for attr in ["observation_space", "action_space", "state_space"]:
+        if hasattr(env_cfg, attr):
+            setattr(env_cfg, attr, deserialize_space(getattr(env_cfg, attr)))
+    for attr in ["observation_spaces", "action_spaces"]:
+        if hasattr(env_cfg, attr):
+            setattr(env_cfg, attr, {k: deserialize_space(v) for k, v in getattr(env_cfg, attr).items()})
+    return env_cfg
diff --git a/source/isaaclab_experimental/isaaclab_experimental/utils/timer.py b/source/isaaclab_experimental/isaaclab_experimental/utils/timer.py
new file mode 100644
index 000000000000..039558a49347
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/utils/timer.py
@@ -0,0 +1,270 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Sub-module for a timer class that can be used for performance measurements."""
+
+from __future__ import annotations
+
+import math
+import time
+from contextlib import ContextDecorator
+from typing import Any, ClassVar, Literal
+
+import warp as wp
+
+
+class TimerError(Exception):
+    """A custom exception used to report errors in use of :class:`Timer` class."""
+
+    pass
+
+
+class Timer(ContextDecorator):
+    """A timer for performance measurements.
+
+    A class to keep track of time for performance measurement.
+    It allows timing via context managers and decorators as well.
+
+    It uses the `time.perf_counter` function to measure time. This function
+    returns the number of seconds since the epoch as a float. It has the
+    highest resolution available on the system.
+
+    As a regular object:
+
+    .. code-block:: python
+
+        import time
+
+        from isaaclab_experimental.utils.timer import Timer
+
+        timer = Timer()
+        timer.start()
+        time.sleep(1)
+        print(1 <= timer.time_elapsed <= 2)  # Output: True
+
+        time.sleep(1)
+        timer.stop()
+        print(2 <= stopwatch.total_run_time)  # Output: True
+
+    As a context manager:
+
+    .. code-block:: python
+
+        import time
+
+        from isaaclab_experimental.utils.timer import Timer
+
+        with Timer() as timer:
+            time.sleep(1)
+            print(1 <= timer.time_elapsed <= 2)  # Output: True
+
+    Reference: https://gist.github.com/sumeet/1123871
+    """
+
+    timing_info: ClassVar[dict[str, dict[str, float]]] = dict()
+    """Dictionary for storing the elapsed time per timer instances globally.
+
+    This dictionary logs the timer information. The keys are the names given to the timer class
+    at its initialization. If no :attr:`name` is passed to the constructor, no time
+    is recorded in the dictionary.
+    """
+
+    enable = True
+    """Whether to enable the timer."""
+
+    enable_display_output = True
+    """Whether to enable the display output."""
+
+    def __init__(
+        self,
+        msg: str | None = None,
+        name: str | None = None,
+        enable: bool = True,
+        format: Literal["s", "ms", "us", "ns"] = "s",
+    ):
+        """Initializes the timer.
+
+        Args:
+            msg: The message to display when using the timer
+                class in a context manager. Defaults to None.
+            name: The name to use for logging times in a global
+                dictionary. Defaults to None.
+            enable: Whether to enable the timer. Defaults to True.
+            format: The format to use for the elapsed time. Defaults to "s".
+        """
+        self._msg = msg
+        self._name = name
+        self._start_time = None
+        self._stop_time = None
+        self._elapsed_time = None
+        self._enable = enable if Timer.enable else False
+        self._format = format
+
+        # Check if the format is valid
+        assert format in ["s", "ms", "us", "ns"], f"Invalid format, {format} is not in [s, ms, us, ns]"
+        # Convert the format to a multiplier
+        self._multiplier = {
+            "s": 1.0,
+            "ms": 1000.0,
+            "us": 1000000.0,
+            "ns": 1000000000.0,
+        }[format]
+
+        # Online welford's algorithm to compute the mean and std of the elapsed time
+        self._mean = 0.0
+        self._m2 = 0.0
+        self._std = 0.0
+        self._n = 0
+
+    def __str__(self) -> str:
+        """A string representation of the class object.
+
+        Returns:
+            A string containing the elapsed time.
+        """
+        return f"{(self.time_elapsed * self._multiplier):0.6f} {self._format}"
+
+    """
+    Properties
+    """
+
+    @property
+    def time_elapsed(self) -> float:
+        """The number of seconds that have elapsed since this timer started timing.
+
+        Note:
+            This is used for checking how much time has elapsed while the timer is still running.
+        """
+        return time.perf_counter() - self._start_time
+
+    @property
+    def total_run_time(self) -> float:
+        """The number of seconds that elapsed from when the timer started to when it ended."""
+        return self._elapsed_time
+
+    """
+    Operations
+    """
+
+    def start(self):
+        """Start timing."""
+        if not self._enable:
+            return
+
+        if self._start_time is not None:
+            raise TimerError("Timer is running. Use .stop() to stop it")
+
+        self._start_time = time.perf_counter()
+
+    def stop(self):
+        """Stop timing."""
+        if not self._enable:
+            return
+
+        if self._start_time is None:
+            raise TimerError("Timer is not running. Use .start() to start it")
+
+        # Synchronize the device to make sure we time the whole operation
+        wp.synchronize_device()
+
+        # Get the elapsed time
+        self._stop_time = time.perf_counter()
+        self._elapsed_time = self._stop_time - self._start_time
+        self._start_time = None
+
+        if (self._name is not None) and (self._enable):
+            # Update the welford's algorithm
+            self.update_welford(self._elapsed_time)
+
+            # Update the timing info
+            Timer.timing_info[self._name] = {
+                "last": self._elapsed_time,
+                "m2": self._m2,
+                "mean": self._mean,
+                "std": self._std,
+                "n": self._n,
+            }
+
+    """
+    Online welford's algorithm to compute the mean and std of the elapsed time
+    """
+
+    def update_welford(self, value: float):
+        """Update the welford's algorithm with a new value."""
+
+        try:
+            self._n = Timer.timing_info[self._name]["n"] + 1
+            delta = value - Timer.timing_info[self._name]["mean"]
+            self._mean = Timer.timing_info[self._name]["mean"] + delta / self._n
+            delta2 = value - self._mean
+            self._m2 = Timer.timing_info[self._name]["m2"] + delta * delta2
+        except KeyError:
+            self._n = 1
+            self._mean = value
+            self._m2 = 0.0
+
+        # Update the std
+        self._std = math.sqrt(self._m2 / self._n)
+
+    """
+    Context managers
+    """
+
+    def __enter__(self) -> Timer:
+        """Start timing and return this `Timer` instance."""
+        self.start()
+        return self
+
+    def __exit__(self, *exc_info: Any):
+        """Stop timing."""
+        self.stop()
+        # print message
+        if self._enable:
+            if (self._msg is not None) and (Timer.enable_display_output):
+                print(
+                    self._msg,
+                    f"Last: {(self._elapsed_time * self._multiplier):0.6f} {self._format}, "
+                    f"Mean: {(self._mean * self._multiplier):0.6f} {self._format}, "
+                    f"Std: {(self._std * self._multiplier):0.6f} {self._format}, "
+                    f"N: {self._n}",
+                )
+
+    """
+    Static Methods
+    """
+
+    @staticmethod
+    def get_timer_info(name: str) -> float:
+        """Retrieves the time logged in the global dictionary
+            based on name.
+
+        Args:
+            name: Name of the the entry to be retrieved.
+
+        Raises:
+            TimerError: If name doesn't exist in the log.
+
+        Returns:
+            A float containing the time logged if the name exists.
+        """
+        if name not in Timer.timing_info:
+            raise TimerError(f"Timer {name} does not exist")
+        return Timer.timing_info.get(name)["last"]
+
+    @staticmethod
+    def get_timer_statistics(name: str) -> dict[str, float]:
+        """Retrieves the time logged in the global dictionary
+            based on name.
+
+        Returns:
+            A dictionary containing the time logged for all timers.
+        """
+
+        if name not in Timer.timing_info:
+            raise TimerError(f"Timer {name} does not exist")
+
+        keys = ["mean", "std", "n"]
+
+        return {k: Timer.timing_info[name][k] for k in keys}
diff --git a/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py b/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py
new file mode 100644
index 000000000000..c7a9d8c525f1
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Warp CUDA graph capture-or-replay utility."""
+
+from collections.abc import Callable
+from typing import Any
+
+import warp as wp
+
+
+class WarpGraphCache:
+    """Caches Warp CUDA graphs by stage name: captures on first call, replays after.
+
+    Usage::
+
+        cache = WarpGraphCache()
+        cache.capture_or_replay("my_stage", my_warp_function)
+        # uncaptured work here ...
+        cache.capture_or_replay("my_stage_post", my_other_function)
+    """
+
+    def __init__(self):
+        self._graphs: dict[str, Any] = {}
+
+    def capture_or_replay(self, stage: str, fn: Callable[[], Any]) -> None:
+        """Capture *fn* into a CUDA graph on the first call, then replay."""
+        graph = self._graphs.get(stage)
+        if graph is None:
+            with wp.ScopedCapture() as capture:
+                fn()
+            self._graphs[stage] = capture.graph
+        else:
+            wp.capture_launch(graph)
+
+    def invalidate(self, stage: str | None = None) -> None:
+        """Drop cached graph(s). If *stage* is ``None``, drop all."""
+        if stage is None:
+            self._graphs.clear()
+        else:
+            self._graphs.pop(stage, None)
diff --git a/source/isaaclab_experimental/pyproject.toml b/source/isaaclab_experimental/pyproject.toml
new file mode 100644
index 000000000000..d90ac3536f16
--- /dev/null
+++ b/source/isaaclab_experimental/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel", "toml"]
+build-backend = "setuptools.build_meta"
diff --git a/source/isaaclab_experimental/setup.py b/source/isaaclab_experimental/setup.py
new file mode 100644
index 000000000000..c84086f0fad1
--- /dev/null
+++ b/source/isaaclab_experimental/setup.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Installation script for the 'isaaclab_experimental' python package."""
+
+import os
+
+import toml
+from setuptools import find_packages, setup
+
+# Obtain the extension data from the extension.toml file
+EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__))
+# Read the extension.toml file
+EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml"))
+
+# Minimum dependencies required prior to installation
+INSTALL_REQUIRES = [
+    # generic
+    "numpy>2",
+    "warp-lang>=1.9.0.dev20250825",  # TODO: update to 1.11.0
+    "torch>=2.7",
+    "prettytable==3.3.0",
+    "toml",
+]
+
+
+PYTORCH_INDEX_URL = ["https://download.pytorch.org/whl/cu118"]
+
+# Installation operation
+setup(
+    name="isaaclab_experimental",
+    author="Isaac Lab Project Developers",
+    maintainer="Isaac Lab Project Developers",
+    url=EXTENSION_TOML_DATA["package"]["repository"],
+    version=EXTENSION_TOML_DATA["package"]["version"],
+    description=EXTENSION_TOML_DATA["package"]["description"],
+    keywords=EXTENSION_TOML_DATA["package"]["keywords"],
+    license="BSD-3-Clause",
+    include_package_data=True,
+    python_requires=">=3.10",
+    install_requires=INSTALL_REQUIRES,
+    dependency_links=PYTORCH_INDEX_URL,
+    packages=find_packages(),
+    classifiers=[
+        "Natural Language :: English",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Isaac Sim :: 5.0.0",
+    ],
+    zip_safe=False,
+)
diff --git a/source/isaaclab_newton/setup.py b/source/isaaclab_newton/setup.py
index 2ae60338168d..8ac8b5700159 100644
--- a/source/isaaclab_newton/setup.py
+++ b/source/isaaclab_newton/setup.py
@@ -22,7 +22,7 @@
     # newton
     "mujoco==3.5.0",
     "mujoco-warp==3.5.0.2",
-    "newton==1.0.0rc1",
+    "newton==1.0.0rc3",
     "PyOpenGL-accelerate==3.1.10",
 ]
 
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
index e7e52ca43a96..7e753c0f7a2a 100644
--- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
+++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
@@ -47,7 +47,16 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N
         # NOTE: import here (not at module level) to avoid loading heavy env classes before Isaac Sim is initialized.
         from isaaclab.envs import DirectRLEnv, ManagerBasedEnv, ManagerBasedRLEnv
 
-        if not isinstance(env.unwrapped, (ManagerBasedRLEnv, ManagerBasedEnv, DirectRLEnv)):
+        try:
+            from isaaclab_experimental.envs import DirectRLEnvWarp
+        except ImportError:
+            DirectRLEnvWarp = None
+
+        allowed_types = (ManagerBasedRLEnv, ManagerBasedEnv, DirectRLEnv)
+        if DirectRLEnvWarp is not None:
+            allowed_types += (DirectRLEnvWarp,)
+
+        if not isinstance(env.unwrapped, allowed_types):
             raise ValueError(
                 "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:"
                 f" {type(env)}"
diff --git a/source/isaaclab_tasks_experimental/config/extension.toml b/source/isaaclab_tasks_experimental/config/extension.toml
new file mode 100644
index 000000000000..75de8d6da0ef
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/config/extension.toml
@@ -0,0 +1,22 @@
+[package]
+
+# Note: Semantic Versioning is used: https://semver.org/
+version = "0.0.1"
+
+# Description
+title = "Experimental environments for IsaacLab"
+description="Extension containing suite of experimental environments for robot learning."
+readme  = "docs/README.md"
+repository = "https://github.com/isaac-sim/IsaacLab"
+category = "robotics"
+keywords = ["robotics", "rl", "il", "learning"]
+
+[dependencies]
+"isaaclab" = {}
+"isaaclab_assets" = {}
+
+[core]
+reloadable = false
+
+[[python.module]]
+name = "isaaclab_tasks.experimental"
diff --git a/source/isaaclab_tasks_experimental/docs/README.md b/source/isaaclab_tasks_experimental/docs/README.md
new file mode 100644
index 000000000000..d9e681518d64
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/docs/README.md
@@ -0,0 +1,3 @@
+# Isaac Lab: Experimental Environment Suite
+
+Experimental environments for robot learning built on top of Isaac Lab.
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py
new file mode 100644
index 000000000000..918c41d73d7b
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Package containing task implementations for various robotic environments."""
+
+import os
+import toml
+
+# Conveniences to other module directories via relative paths
+ISAACLAB_TASKS_EXPERIMENTAL_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
+"""Path to the extension source directory."""
+
+ISAACLAB_TASKS_EXPERIMENTAL_METADATA = toml.load(
+    os.path.join(ISAACLAB_TASKS_EXPERIMENTAL_EXT_DIR, "config", "extension.toml")
+)
+"""Extension metadata dictionary parsed from the extension.toml file."""
+
+# Configure the module-level variables
+__version__ = ISAACLAB_TASKS_EXPERIMENTAL_METADATA["package"]["version"]
+
+##
+# Register Gym environments.
+##
+
+from isaaclab_tasks.utils import import_packages
+
+# The blacklist is used to prevent importing configs from sub-packages
+_BLACKLIST_PKGS = ["utils", ".mdp"]
+# Import all configs in this package
+import_packages(__name__, _BLACKLIST_PKGS)
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py
new file mode 100644
index 000000000000..3e2b7945ebde
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Direct workflow environments.
+"""
+
+import gymnasium as gym
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py
new file mode 100644
index 000000000000..c954081b9c54
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Allegro Inhand Manipulation environment.
+"""
+
+import gymnasium as gym
+
+##
+# Register Gym environments.
+##
+
+inhand_task_entry = "isaaclab_tasks_experimental.direct.inhand_manipulation"
+stable_agents = "isaaclab_tasks.direct.allegro_hand.agents"
+
+gym.register(
+    id="Isaac-Repose-Cube-Allegro-Direct-Warp-v0",
+    entry_point=f"{inhand_task_entry}.inhand_manipulation_warp_env:InHandManipulationWarpEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.allegro_hand_warp_env_cfg:AllegroHandWarpEnvCfg",
+        "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:AllegroHandPPORunnerCfg",
+        "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml",
+    },
+)
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py
new file mode 100644
index 000000000000..2d607d475329
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py
@@ -0,0 +1,136 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg  # , RigidObjectCfg
+from isaaclab.envs import DirectRLEnvCfg
+from isaaclab.markers import VisualizationMarkersCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import SimulationCfg
+from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg
+from isaaclab.utils import configclass
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
+
+from isaaclab_assets.robots.allegro import ALLEGRO_HAND_CFG
+
+
+@configclass
+class AllegroHandWarpEnvCfg(DirectRLEnvCfg):
+    # env
+    decimation = 4
+    episode_length_s = 10.0
+    action_space = 16
+    observation_space = 124  # (full)
+    state_space = 0
+    asymmetric_obs = False
+    obs_type = "full"
+
+    solver_cfg = MJWarpSolverCfg(
+        solver="newton",
+        integrator="implicitfast",
+        njmax=80,
+        nconmax=70,
+        impratio=10.0,
+        cone="elliptic",
+        update_data_interval=2,
+        iterations=100,
+        ls_iterations=15,
+        ls_parallel=False,
+        # save_to_mjcf="AllegroHand.xml",
+    )
+
+    newton_cfg = NewtonCfg(
+        solver_cfg=solver_cfg,
+        num_substeps=2,
+        debug_mode=False,
+    )
+    # simulation
+    sim: SimulationCfg = SimulationCfg(
+        dt=1 / 120,
+        render_interval=decimation,
+        physics_material=RigidBodyMaterialCfg(
+            static_friction=1.0,
+            dynamic_friction=1.0,
+        ),
+        physics=newton_cfg,
+    )
+    # robot
+    robot_cfg: ArticulationCfg = ALLEGRO_HAND_CFG.replace(prim_path="/World/envs/env_.*/Robot")
+
+    actuated_joint_names = [
+        "index_joint_0",
+        "middle_joint_0",
+        "ring_joint_0",
+        "thumb_joint_0",
+        "index_joint_1",
+        "index_joint_2",
+        "index_joint_3",
+        "middle_joint_1",
+        "middle_joint_2",
+        "middle_joint_3",
+        "ring_joint_1",
+        "ring_joint_2",
+        "ring_joint_3",
+        "thumb_joint_1",
+        "thumb_joint_2",
+        "thumb_joint_3",
+    ]
+    fingertip_body_names = [
+        "index_link_3",
+        "middle_link_3",
+        "ring_link_3",
+        "thumb_link_3",
+    ]
+
+    # in-hand object
+    object_cfg: ArticulationCfg = ArticulationCfg(
+        prim_path="/World/envs/env_.*/object",
+        spawn=sim_utils.UsdFileCfg(
+            usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd",
+            mass_props=sim_utils.MassPropertiesCfg(density=400.0),
+            scale=(1.2, 1.2, 1.2),
+        ),
+        # FIXME: it does seem to be a bug for ArticulationCfg for handling empty joint list
+        init_state=ArticulationCfg.InitialStateCfg(
+            pos=(0.0, -0.17, 0.565), rot=(0.0, 0.0, 0.0, 1.0), joint_pos={}, joint_vel={}
+        ),
+        actuators={},
+        articulation_root_prim_path="",
+    )
+    # goal object
+    goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg(
+        prim_path="/Visuals/goal_marker",
+        markers={
+            "goal": sim_utils.UsdFileCfg(
+                usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd",
+                scale=(1.2, 1.2, 1.2),
+            )
+        },
+    )
+    # scene
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(
+        num_envs=8192, env_spacing=0.75, replicate_physics=True, clone_in_fabric=True
+    )
+    # reset
+    reset_position_noise = 0.01  # range of position at reset
+    reset_dof_pos_noise = 0.2  # range of dof pos at reset
+    reset_dof_vel_noise = 0.0  # range of dof vel at reset
+    # reward scales
+    dist_reward_scale = -10.0
+    rot_reward_scale = 1.0
+    rot_eps = 0.1
+    action_penalty_scale = -0.0002
+    reach_goal_bonus = 250
+    fall_penalty = 0
+    fall_dist = 0.24
+    vel_obs_scale = 0.2
+    success_tolerance = 0.2
+    max_consecutive_success = 0
+    av_factor = 0.1
+    act_moving_average = 1.0
+    force_torque_obs_scale = 10.0
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py
new file mode 100644
index 000000000000..460a30569089
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py
new file mode 100644
index 000000000000..fca92ac1e514
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py
@@ -0,0 +1,980 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import TYPE_CHECKING
+
+import torch
+import warp as wp
+from isaaclab_experimental.envs import DirectRLEnvWarp
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import Articulation  # , RigidObject
+from isaaclab.markers import VisualizationMarkers
+from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
+
+if TYPE_CHECKING:
+    from isaaclab_tasks_experimental.direct.allegro_hand.allegro_hand_warp_env_cfg import AllegroHandWarpEnvCfg
+
+
+@wp.kernel
+def initialize_rng_state(
+    # input
+    seed: wp.int32,
+    # output
+    state: wp.array(dtype=wp.uint32),
+):
+    env_id = wp.tid()
+    state[env_id] = wp.rand_init(seed, wp.int32(env_id))
+
+
+@wp.kernel
+def apply_actions_to_targets(
+    # input
+    actions: wp.array2d(dtype=wp.float32),
+    lower_limits: wp.array2d(dtype=wp.float32),
+    upper_limits: wp.array2d(dtype=wp.float32),
+    actuated_dof_indices: wp.array(dtype=wp.int32),
+    act_moving_average: wp.float32,
+    # input/output
+    prev_targets: wp.array2d(dtype=wp.float32),
+    # output
+    cur_targets: wp.array2d(dtype=wp.float32),
+):
+    env_id, i = wp.tid()
+    dof_id = actuated_dof_indices[i]
+
+    # clamp and scale action to target range
+    a = wp.clamp(actions[env_id, i], wp.float32(-1.0), wp.float32(1.0))
+    lower = lower_limits[env_id, dof_id]
+    upper = upper_limits[env_id, dof_id]
+    t = scale(a, lower, upper)
+
+    # smoothing and boundary clamping
+    t = act_moving_average * t + (wp.float32(1.0) - act_moving_average) * prev_targets[env_id, dof_id]
+    t = wp.clamp(t, lower, upper)
+
+    # update targets
+    cur_targets[env_id, dof_id] = t
+    prev_targets[env_id, dof_id] = t
+
+
+@wp.kernel
+def reset_target_pose(
+    # input
+    env_mask: wp.array(dtype=wp.bool),
+    x_unit_vec: wp.vec3f,
+    y_unit_vec: wp.vec3f,
+    env_origins: wp.array(dtype=wp.vec3f),
+    goal_pos: wp.array(dtype=wp.vec3f),
+    # input/output
+    rng_state: wp.array(dtype=wp.uint32),
+    # output
+    goal_rot: wp.array(dtype=wp.quatf),
+    reset_goal_buf: wp.array(dtype=wp.bool),
+    goal_pos_w: wp.array(dtype=wp.vec3f),
+):
+    env_id = wp.tid()
+    if env_mask[env_id]:
+        rand0 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        rand1 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+
+        goal_rot[env_id] = randomize_rotation(rand0, rand1, x_unit_vec, y_unit_vec)
+        reset_goal_buf[env_id] = False
+
+    # Warp-native addition: goal position in world frame.
+    goal_pos_w[env_id] = goal_pos[env_id] + env_origins[env_id]
+
+
+@wp.kernel
+def reset_object(
+    # input
+    default_root_pose: wp.array(dtype=wp.transformf),
+    env_origins: wp.array(dtype=wp.vec3f),
+    reset_position_noise: wp.float32,
+    x_unit_vec: wp.vec3f,
+    y_unit_vec: wp.vec3f,
+    env_mask: wp.array(dtype=wp.bool),
+    # input/output
+    rng_state: wp.array(dtype=wp.uint32),
+    # output
+    root_pose_w: wp.array(dtype=wp.transformf),
+    root_vel_w: wp.array(dtype=wp.spatial_vectorf),
+):
+    env_id = wp.tid()
+    if env_mask[env_id]:
+        nx = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        ny = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        nz = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+
+        pos_noise = reset_position_noise * wp.vec3f(nx, ny, nz)
+        base_pos = wp.transform_get_translation(default_root_pose[env_id])
+        pos_w = base_pos + env_origins[env_id] + pos_noise
+
+        rand0 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        rand1 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        rot_w = randomize_rotation(rand0, rand1, x_unit_vec, y_unit_vec)
+
+        # The following should be equivalent, but consider using write_root_pose_to_sim and write_root_velocity_to_sim
+        root_pose_w[env_id] = wp.transform(pos_w, rot_w)
+        root_vel_w[env_id] = wp.spatial_vectorf(
+            wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0)
+        )
+
+
+@wp.kernel
+def reset_hand(
+    # input
+    default_joint_pos: wp.array2d(dtype=wp.float32),
+    default_joint_vel: wp.array2d(dtype=wp.float32),
+    lower_limits: wp.array2d(dtype=wp.float32),
+    upper_limits: wp.array2d(dtype=wp.float32),
+    reset_dof_pos_noise: wp.float32,
+    reset_dof_vel_noise: wp.float32,
+    env_mask: wp.array(dtype=wp.bool),
+    num_dofs: wp.int32,
+    # input/output
+    rng_state: wp.array(dtype=wp.uint32),
+    # output
+    joint_pos: wp.array2d(dtype=wp.float32),
+    joint_vel: wp.array2d(dtype=wp.float32),
+    prev_targets: wp.array2d(dtype=wp.float32),
+    cur_targets: wp.array2d(dtype=wp.float32),
+    hand_dof_targets: wp.array2d(dtype=wp.float32),
+):
+    env_id = wp.tid()
+    if env_mask[env_id]:
+        # Each env runs sequentially inside this kernel (avoids RNG races across DOFs).
+        for dof_id in range(num_dofs):
+            dof_pos_noise = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+            rng_state[env_id] += wp.uint32(1)
+
+            delta_max = upper_limits[env_id, dof_id] - default_joint_pos[env_id, dof_id]
+            delta_min = lower_limits[env_id, dof_id] - default_joint_pos[env_id, dof_id]
+            rand_delta = delta_min + (delta_max - delta_min) * 0.5 * dof_pos_noise
+            pos = default_joint_pos[env_id, dof_id] + reset_dof_pos_noise * rand_delta
+
+            dof_vel_noise = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+            rng_state[env_id] += wp.uint32(1)
+            vel = default_joint_vel[env_id, dof_id] + reset_dof_vel_noise * dof_vel_noise
+
+            # The following lines should be equivalent to the following:
+            # self.hand.write_joint_state_to_sim(dof_pos, dof_vel, env_ids=env_ids)
+            joint_pos[env_id, dof_id] = pos
+            joint_vel[env_id, dof_id] = vel
+
+            prev_targets[env_id, dof_id] = pos
+            cur_targets[env_id, dof_id] = pos
+            hand_dof_targets[env_id, dof_id] = pos
+
+
+@wp.kernel
+def reset_successes(
+    # input
+    env_mask: wp.array(dtype=wp.bool),
+    # output
+    successes: wp.array(dtype=wp.float32),
+):
+    env_id = wp.tid()
+    if env_mask[env_id]:
+        successes[env_id] = wp.float32(0.0)
+
+
+@wp.kernel
+def compute_intermediate_values(
+    # input
+    body_pos_w: wp.array2d(dtype=wp.vec3f),
+    body_quat_w: wp.array2d(dtype=wp.quatf),
+    body_vel_w: wp.array2d(dtype=wp.spatial_vectorf),
+    finger_bodies: wp.array(dtype=wp.int32),
+    env_origins: wp.array(dtype=wp.vec3f),
+    object_root_pose_w: wp.array(dtype=wp.transformf),
+    object_root_vel_w: wp.array(dtype=wp.spatial_vectorf),
+    num_fingertips: wp.int32,
+    # output
+    fingertip_pos: wp.array2d(dtype=wp.vec3f),
+    fingertip_rot: wp.array2d(dtype=wp.quatf),
+    fingertip_velocities: wp.array2d(dtype=wp.spatial_vectorf),
+    object_pose: wp.array(dtype=wp.transformf),
+    object_vels: wp.array(dtype=wp.spatial_vectorf),
+):
+    env_id = wp.tid()
+
+    for i in range(num_fingertips):
+        body_id = finger_bodies[i]
+        fingertip_pos[env_id, i] = body_pos_w[env_id, body_id] - env_origins[env_id]
+        fingertip_rot[env_id, i] = body_quat_w[env_id, body_id]
+        fingertip_velocities[env_id, i] = body_vel_w[env_id, body_id]
+
+    # Store object pose in env-local frame (translation only; orientation unchanged).
+    pos_w = wp.transform_get_translation(object_root_pose_w[env_id])
+    pos = pos_w - env_origins[env_id]
+    rot = wp.transform_get_rotation(object_root_pose_w[env_id])
+    object_pose[env_id] = wp.transform(pos, rot)
+    object_vels[env_id] = object_root_vel_w[env_id]
+
+
+@wp.kernel
+def get_dones(
+    # input
+    max_episode_length: wp.int32,
+    object_pose: wp.array(dtype=wp.transformf),
+    in_hand_pos: wp.array(dtype=wp.vec3f),
+    goal_rot: wp.array(dtype=wp.quatf),
+    fall_dist: wp.float32,
+    success_tolerance: wp.float32,
+    max_consecutive_success: wp.int32,
+    successes: wp.array(dtype=wp.float32),
+    # input/output
+    episode_length_buf: wp.array(dtype=wp.int32),
+    # output
+    out_of_reach: wp.array(dtype=wp.bool),
+    time_out: wp.array(dtype=wp.bool),
+    reset: wp.array(dtype=wp.bool),
+):
+    env_id = wp.tid()
+
+    object_pos = wp.transform_get_translation(object_pose[env_id])
+    object_rot = wp.transform_get_rotation(object_pose[env_id])
+
+    goal_dist = wp.length(object_pos - in_hand_pos[env_id])
+    out_of_reach[env_id] = goal_dist >= fall_dist
+
+    max_success_reached = False
+    if max_consecutive_success > 0:
+        # Reset progress (episode length buf) on goal envs if max_consecutive_success > 0
+        rot_dist = rotation_distance(object_rot, goal_rot[env_id])
+        if wp.abs(rot_dist) <= success_tolerance:
+            episode_length_buf[env_id] = 0
+        max_success_reached = successes[env_id] >= wp.float32(max_consecutive_success)
+
+    time_out[env_id] = episode_length_buf[env_id] >= (max_episode_length - 1) or max_success_reached
+    reset[env_id] = out_of_reach[env_id] or time_out[env_id]
+
+
+@wp.kernel
+def compute_reduced_observations(
+    # input
+    fingertip_pos: wp.array2d(dtype=wp.vec3f),
+    object_pose: wp.array(dtype=wp.transformf),
+    goal_rot: wp.array(dtype=wp.quatf),
+    actions: wp.array2d(dtype=wp.float32),
+    num_fingertips: wp.int32,
+    action_dim: wp.int32,
+    # output
+    observations: wp.array2d(dtype=wp.float32),
+):
+    env_id = wp.tid()
+
+    obj_pos = wp.transform_get_translation(object_pose[env_id])
+    obj_rot = wp.transform_get_rotation(object_pose[env_id])
+
+    idx = int(0)
+    for i in range(num_fingertips):
+        observations[env_id, idx + 0] = fingertip_pos[env_id, i][0]
+        observations[env_id, idx + 1] = fingertip_pos[env_id, i][1]
+        observations[env_id, idx + 2] = fingertip_pos[env_id, i][2]
+        idx += 3
+
+    observations[env_id, idx + 0] = obj_pos[0]
+    observations[env_id, idx + 1] = obj_pos[1]
+    observations[env_id, idx + 2] = obj_pos[2]
+    idx += 3
+
+    rel = obj_rot * wp.quat_inverse(goal_rot[env_id])
+    observations[env_id, idx + 0] = rel[0]
+    observations[env_id, idx + 1] = rel[1]
+    observations[env_id, idx + 2] = rel[2]
+    observations[env_id, idx + 3] = rel[3]
+    idx += 4
+
+    for i in range(action_dim):
+        observations[env_id, idx + i] = actions[env_id, i]
+
+
+@wp.kernel
+def compute_full_observations(
+    # input
+    hand_dof_pos: wp.array2d(dtype=wp.float32),
+    hand_dof_vel: wp.array2d(dtype=wp.float32),
+    hand_dof_lower_limits: wp.array2d(dtype=wp.float32),
+    hand_dof_upper_limits: wp.array2d(dtype=wp.float32),
+    vel_obs_scale: wp.float32,
+    object_pose: wp.array(dtype=wp.transformf),
+    object_vels: wp.array(dtype=wp.spatial_vectorf),
+    in_hand_pos: wp.array(dtype=wp.vec3f),
+    goal_rot: wp.array(dtype=wp.quatf),
+    fingertip_pos: wp.array2d(dtype=wp.vec3f),
+    fingertip_rot: wp.array2d(dtype=wp.quatf),
+    fingertip_velocities: wp.array2d(dtype=wp.spatial_vectorf),
+    actions: wp.array2d(dtype=wp.float32),
+    num_hand_dofs: wp.int32,
+    num_fingertips: wp.int32,
+    action_dim: wp.int32,
+    # output
+    observations: wp.array2d(dtype=wp.float32),
+):
+    env_id = wp.tid()
+
+    # hand
+    for i in range(num_hand_dofs):
+        observations[env_id, i] = unscale(
+            hand_dof_pos[env_id, i], hand_dof_lower_limits[env_id, i], hand_dof_upper_limits[env_id, i]
+        )
+
+    offset = num_hand_dofs
+    for i in range(num_hand_dofs):
+        observations[env_id, offset + i] = vel_obs_scale * hand_dof_vel[env_id, i]
+    offset += num_hand_dofs
+
+    # object
+    obj_pos = wp.transform_get_translation(object_pose[env_id])
+    obj_rot = wp.transform_get_rotation(object_pose[env_id])
+
+    observations[env_id, offset + 0] = obj_pos[0]
+    observations[env_id, offset + 1] = obj_pos[1]
+    observations[env_id, offset + 2] = obj_pos[2]
+    offset += 3
+
+    observations[env_id, offset + 0] = obj_rot[0]
+    observations[env_id, offset + 1] = obj_rot[1]
+    observations[env_id, offset + 2] = obj_rot[2]
+    observations[env_id, offset + 3] = obj_rot[3]
+    offset += 4
+
+    # spatial_vectorf layout: [0:3]=angular, [3:6]=linear
+    # torch reference order: linear (unscaled) first, then angular (scaled)
+    observations[env_id, offset + 0] = object_vels[env_id][3]
+    observations[env_id, offset + 1] = object_vels[env_id][4]
+    observations[env_id, offset + 2] = object_vels[env_id][5]
+    offset += 3
+
+    observations[env_id, offset + 0] = vel_obs_scale * object_vels[env_id][0]
+    observations[env_id, offset + 1] = vel_obs_scale * object_vels[env_id][1]
+    observations[env_id, offset + 2] = vel_obs_scale * object_vels[env_id][2]
+    offset += 3
+
+    # goal
+    observations[env_id, offset + 0] = in_hand_pos[env_id][0]
+    observations[env_id, offset + 1] = in_hand_pos[env_id][1]
+    observations[env_id, offset + 2] = in_hand_pos[env_id][2]
+    offset += 3
+
+    observations[env_id, offset + 0] = goal_rot[env_id][0]
+    observations[env_id, offset + 1] = goal_rot[env_id][1]
+    observations[env_id, offset + 2] = goal_rot[env_id][2]
+    observations[env_id, offset + 3] = goal_rot[env_id][3]
+    offset += 4
+
+    rel = obj_rot * wp.quat_inverse(goal_rot[env_id])
+    observations[env_id, offset + 0] = rel[0]
+    observations[env_id, offset + 1] = rel[1]
+    observations[env_id, offset + 2] = rel[2]
+    observations[env_id, offset + 3] = rel[3]
+    offset += 4
+
+    # fingertips
+    for i in range(num_fingertips):
+        observations[env_id, offset + 0] = fingertip_pos[env_id, i][0]
+        observations[env_id, offset + 1] = fingertip_pos[env_id, i][1]
+        observations[env_id, offset + 2] = fingertip_pos[env_id, i][2]
+        offset += 3
+
+    for i in range(num_fingertips):
+        observations[env_id, offset + 0] = fingertip_rot[env_id, i][0]
+        observations[env_id, offset + 1] = fingertip_rot[env_id, i][1]
+        observations[env_id, offset + 2] = fingertip_rot[env_id, i][2]
+        observations[env_id, offset + 3] = fingertip_rot[env_id, i][3]
+        offset += 4
+
+    for i in range(num_fingertips):
+        for j in range(6):
+            observations[env_id, offset + j] = fingertip_velocities[env_id, i][j]
+        offset += 6
+
+    # actions
+    for i in range(action_dim):
+        observations[env_id, offset + i] = actions[env_id, i]
+
+
+@wp.kernel
+def sanitize_and_print_once(
+    # input/output
+    obs: wp.array(dtype=wp.float32),
+    printed_flag: wp.array(dtype=wp.int32),
+):
+    i = wp.tid()
+    v = obs[i]
+
+    if not wp.isfinite(v):
+        # Try to claim the "print token"
+        if wp.atomic_cas(printed_flag, 0, 0, 1) == 0:
+            wp.printf("Non-finite values in observations")
+
+        obs[i] = wp.float32(0.0)
+
+
+@wp.kernel
+def compute_rewards(
+    # input
+    reset_buf: wp.array(dtype=wp.bool),
+    object_pose: wp.array(dtype=wp.transformf),
+    target_pos: wp.array(dtype=wp.vec3f),
+    target_rot: wp.array(dtype=wp.quatf),
+    dist_reward_scale: wp.float32,
+    rot_reward_scale: wp.float32,
+    rot_eps: wp.float32,
+    actions: wp.array2d(dtype=wp.float32),
+    action_penalty_scale: wp.float32,
+    success_tolerance: wp.float32,
+    reach_goal_bonus: wp.float32,
+    fall_dist: wp.float32,
+    fall_penalty: wp.float32,
+    action_dim: wp.int32,
+    # input/output
+    reset_goal_buf: wp.array(dtype=wp.bool),
+    successes: wp.array(dtype=wp.float32),
+    num_resets_out: wp.array(dtype=wp.float32),
+    finished_cons_successes_out: wp.array(dtype=wp.float32),
+    # output
+    reward_out: wp.array(dtype=wp.float32),
+):
+    env_id = wp.tid()
+
+    obj_pos = wp.transform_get_translation(object_pose[env_id])
+    obj_rot = wp.transform_get_rotation(object_pose[env_id])
+
+    goal_dist = wp.length(obj_pos - target_pos[env_id])
+    rot_dist = rotation_distance(obj_rot, target_rot[env_id])
+
+    dist_rew = goal_dist * dist_reward_scale
+    rot_rew = wp.float32(1.0) / (wp.abs(rot_dist) + rot_eps) * rot_reward_scale
+
+    action_penalty = wp.float32(0.0)
+    for i in range(action_dim):
+        action_penalty += actions[env_id, i] * actions[env_id, i]
+
+    # Total reward is: position distance + orientation alignment + action regularization + success bonus + fall penalty
+    reward = dist_rew + rot_rew + action_penalty * action_penalty_scale
+
+    # Find out which envs hit the goal and update successes count
+    reached = wp.abs(rot_dist) <= success_tolerance
+    goal_resets = reached or reset_goal_buf[env_id]
+    reset_goal_buf[env_id] = goal_resets
+    if goal_resets:
+        successes[env_id] = successes[env_id] + wp.float32(1.0)
+
+    # Success bonus: orientation is within `success_tolerance` of goal orientation
+    if goal_resets:
+        reward = reward + reach_goal_bonus
+
+    # Fall penalty: distance to the goal is larger than a threshold
+    if goal_dist >= fall_dist:
+        reward = reward + fall_penalty
+
+    # Consecutive-successes stats (mirrors Torch env):
+    #   resets = torch.where(goal_dist >= fall_dist, ones_like(reset_buf), reset_buf)
+    resets = (goal_dist >= fall_dist) or reset_buf[env_id]
+    if resets:
+        wp.atomic_add(num_resets_out, 0, wp.float32(1.0))
+        wp.atomic_add(finished_cons_successes_out, 0, successes[env_id])
+
+    reward_out[env_id] = reward
+
+
+@wp.kernel
+def update_consecutive_successes_from_stats(
+    # input
+    num_resets: wp.array(dtype=wp.float32),
+    finished_cons_successes: wp.array(dtype=wp.float32),
+    av_factor: wp.float32,
+    # input/output
+    consecutive_successes: wp.array(dtype=wp.float32),
+):
+    """Finalize the Torch env's EMA update for consecutive_successes and clear the accumulators."""
+    # single-thread kernel (dim=1)
+    n = num_resets[0]
+    prev = consecutive_successes[0]
+    if n > wp.float32(0.0):
+        consecutive_successes[0] = av_factor * (finished_cons_successes[0] / n) + (wp.float32(1.0) - av_factor) * prev
+
+
+@wp.func
+def scale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32:
+    return wp.float32(0.5) * (x + wp.float32(1.0)) * (upper - lower) + lower
+
+
+@wp.func
+def unscale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32:
+    return (wp.float32(2.0) * x - upper - lower) / (upper - lower)
+
+
+@wp.func
+def randomize_rotation(rand0: wp.float32, rand1: wp.float32, x_axis: wp.vec3f, y_axis: wp.vec3f) -> wp.quatf:
+    return wp.quat_from_axis_angle(x_axis, rand0 * wp.pi) * wp.quat_from_axis_angle(y_axis, rand1 * wp.pi)
+
+
+@wp.func
+def rotation_distance(object_rot: wp.quatf, target_rot: wp.quatf) -> wp.float32:
+    # Orientation alignment for the cube in hand and goal cube
+    quat_diff = object_rot * wp.quat_inverse(target_rot)
+    # Match Torch env convention: uses indices [1:4] for the vector part (see `rotation_distance` in Torch env).
+    v_norm = wp.sqrt(quat_diff[1] * quat_diff[1] + quat_diff[2] * quat_diff[2] + quat_diff[3] * quat_diff[3])
+    v_norm = wp.min(v_norm, wp.float32(1.0))
+    return wp.float32(2.0) * wp.asin(v_norm)
+
+
+class InHandManipulationWarpEnv(DirectRLEnvWarp):
+    cfg: AllegroHandWarpEnvCfg  # | ShadowHandWarpEnvCfg
+
+    # def __init__(self, cfg: AllegroHandWarpEnvCfg | ShadowHandWarpEnvCfg, render_mode: str | None = None, **kwargs):
+    def __init__(self, cfg: AllegroHandWarpEnvCfg, render_mode: str | None = None, **kwargs):
+        super().__init__(cfg, render_mode, **kwargs)
+
+        # ---------------------------------------------------------------------
+        # Constants
+        # ---------------------------------------------------------------------
+
+        # dof used for joint related init and sample
+        self.num_hand_dofs = self.hand.num_joints
+
+        # list of actuated joints
+        actuated_dof_indices: list[int] = list()
+        for joint_name in cfg.actuated_joint_names:
+            actuated_dof_indices.append(self.hand.joint_names.index(joint_name))
+        actuated_dof_indices.sort()
+        self.num_actuated_dofs = len(actuated_dof_indices)
+
+        # Warp index/mask helpers for kernels and articulation APIs.
+        self.actuated_dof_indices = wp.array(actuated_dof_indices, dtype=wp.int32, device=self.device)
+        actuated_mask = [False] * self.num_hand_dofs
+        for idx in actuated_dof_indices:
+            actuated_mask[idx] = True
+        self.actuated_dof_mask = wp.array(actuated_mask, dtype=wp.bool, device=self.device)
+
+        # finger bodies
+        finger_bodies: list[int] = list()
+        for body_name in self.cfg.fingertip_body_names:
+            finger_bodies.append(self.hand.body_names.index(body_name))
+        finger_bodies.sort()
+        self.num_fingertips = len(finger_bodies)
+        self.finger_bodies = wp.array(finger_bodies, dtype=wp.int32, device=self.device)
+
+        # joint limits
+        self.hand_dof_lower_limits = self.hand.data.joint_pos_limits_lower
+        self.hand_dof_upper_limits = self.hand.data.joint_pos_limits_upper
+
+        # unit vectors
+        self.x_unit_vec = wp.vec3f(1.0, 0.0, 0.0)
+        self.y_unit_vec = wp.vec3f(0.0, 1.0, 0.0)
+        self.z_unit_vec = wp.vec3f(0.0, 0.0, 1.0)
+
+        # Per-env origins (Warp view for kernels; Torch env uses `self.scene.env_origins` directly).
+        self.env_origins = wp.from_torch(self.scene.env_origins, dtype=wp.vec3f)
+
+        # ---------------------------------------------------------------------
+        # Warp buffers
+        # ---------------------------------------------------------------------
+
+        # buffers for position targets
+        self.hand_dof_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device)
+        self.prev_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device)
+        self.cur_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device)
+
+        # track goal resets
+        self.reset_goal_buf = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device)
+        # used to compare object position
+        self.in_hand_pos = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device)
+        # default goal positions
+        self.goal_rot = wp.zeros(self.num_envs, dtype=wp.quatf, device=self.device)
+        self.goal_pos = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device)
+        self.goal_pos_w = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device)
+
+        # Initialize goal constants from Torch (avoid a one-off kernel launch).
+        default_root_pose = wp.to_torch(self.object.data.default_root_pose).to(self.device)
+        in_hand_pos = default_root_pose[:, 0:3].clone()
+        in_hand_pos[:, 2] -= 0.04
+        self.in_hand_pos.assign(wp.from_torch(in_hand_pos, dtype=wp.vec3f))
+
+        goal_pos = torch.tensor([-0.2, -0.45, 0.68], device=self.device, dtype=torch.float32).repeat((self.num_envs, 1))
+        self.goal_pos.assign(wp.from_torch(goal_pos, dtype=wp.vec3f))
+
+        goal_rot = torch.zeros((self.num_envs, 4), device=self.device, dtype=torch.float32)
+        goal_rot[:, 3] = 1.0  # (x, y, z, w)
+        self.goal_rot.assign(wp.from_torch(goal_rot, dtype=wp.quatf))
+
+        # initialize goal marker
+        self.goal_markers = VisualizationMarkers(self.cfg.goal_object_cfg)
+
+        # Reduction buffers for consecutive_successes update (Warp-only).
+        self._num_resets = wp.zeros(1, dtype=wp.float32, device=self.device)
+        self._finished_cons_successes = wp.zeros(1, dtype=wp.float32, device=self.device)
+        # track successes
+        self.successes = wp.zeros(self.num_envs, dtype=wp.float32, device=self.device)
+        self.consecutive_successes = wp.zeros(1, dtype=wp.float32, device=self.device)
+
+        # Persistent RL buffers (Warp).
+        self.actions = wp.zeros((self.num_envs, self.cfg.action_space), dtype=wp.float32, device=self.device)
+        self.observations = wp.zeros((self.num_envs, self.cfg.observation_space), dtype=wp.float32, device=self.device)
+        self.rewards = wp.zeros((self.num_envs,), dtype=wp.float32, device=self.device)
+        # Flag used as a print token for non-finite observations (Warp).
+        self.obs_nonfinite_flag = wp.zeros(1, dtype=wp.int32, device=self.device)
+
+        # Intermediate values (Warp) -- mirrors the Torch env's `_compute_intermediate_values` fields.
+        self.fingertip_pos = wp.zeros((self.num_envs, self.num_fingertips), dtype=wp.vec3f, device=self.device)
+        self.fingertip_rot = wp.zeros((self.num_envs, self.num_fingertips), dtype=wp.quatf, device=self.device)
+        self.fingertip_velocities = wp.zeros(
+            (self.num_envs, self.num_fingertips), dtype=wp.spatial_vectorf, device=self.device
+        )
+
+        self.object_pose = wp.zeros(self.num_envs, dtype=wp.transformf, device=self.device)
+        self.object_vels = wp.zeros(self.num_envs, dtype=wp.spatial_vectorf, device=self.device)
+
+        # RNG state (per-env) for randomizations in reset/goal resets.
+        self.rng_state = wp.zeros(self.num_envs, dtype=wp.uint32, device=self.device)
+        if self.cfg.seed is None:
+            self.cfg.seed = -1
+        wp.launch(
+            initialize_rng_state,
+            dim=self.num_envs,
+            inputs=[
+                self.cfg.seed,
+                self.rng_state,
+            ],
+            device=self.device,
+        )
+
+        # ---------------------------------------------------------------------
+        # Torch views / aliases
+        # ---------------------------------------------------------------------
+
+        # Bind torch buffers to warp buffers (same pattern as Warp Cartpole).
+        self.torch_obs_buf = wp.to_torch(self.observations)
+        self.torch_reward_buf = wp.to_torch(self.rewards)
+        self.torch_reset_terminated = wp.to_torch(self.reset_terminated)
+        self.torch_reset_time_outs = wp.to_torch(self.reset_time_outs)
+        self.torch_episode_length_buf = self.episode_length_buf  # already a torch tensor via wp.to_torch
+
+    def _setup_scene(self):
+        # add hand, in-hand object, and goal object
+        self.hand = Articulation(self.cfg.robot_cfg)
+        self.object = Articulation(self.cfg.object_cfg)
+        # add ground plane
+        spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
+        # clone and replicate (no need to filter for this environment)
+        self.scene.clone_environments(copy_from_source=False)
+        # add articulation to scene - we must register to scene to randomize with EventManager
+        self.scene.articulations["robot"] = self.hand
+        self.scene.articulations["object"] = self.object
+        # add lights
+        light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
+        light_cfg.func("/World/Light", light_cfg)
+
+    def _pre_physics_step(self, actions: wp.array) -> None:
+        # Store actions in a persistent Warp buffer (analogous to `actions.clone()` in the Torch env).
+        wp.copy(self.actions, actions)
+
+    def _apply_action(self) -> None:
+        wp.launch(
+            apply_actions_to_targets,
+            dim=(self.num_envs, self.num_actuated_dofs),
+            inputs=[
+                self.actions,
+                self.hand_dof_lower_limits,
+                self.hand_dof_upper_limits,
+                self.actuated_dof_indices,
+                self.cfg.act_moving_average,
+                self.prev_targets,
+                self.cur_targets,
+            ],
+            device=self.device,
+        )
+
+        # Apply position targets using mask method (CUDA graph safe).
+        # All joints are actuated for Allegro, so default masks (None = all) are correct.
+        self.hand.set_joint_position_target_mask(target=self.cur_targets)
+
+    def _get_observations(self) -> dict:
+        # if self.cfg.asymmetric_obs:
+        #    self.fingertip_force_sensors = self.hand.root_physx_view.get_link_incoming_joint_force()[
+        #        :, self.finger_bodies
+        #    ]
+        if self.cfg.obs_type == "openai":
+            self.compute_reduced_observations()
+        elif self.cfg.obs_type == "full":
+            self.compute_full_observations()
+        else:
+            raise ValueError(f"Unknown obs_type: {self.cfg.obs_type}")
+        return {"policy": self.torch_obs_buf}
+
+    def _get_rewards(self) -> None:
+        # Clear reduction buffers before launching the reward kernel.
+        # wp.assign(self._num_resets, 0.0)
+        # wp.assign(self._finished_cons_successes, 0.0)
+        self._num_resets.zero_()
+        self._finished_cons_successes.zero_()
+        wp.launch(
+            compute_rewards,
+            dim=self.num_envs,
+            inputs=[
+                self.reset_buf,
+                self.object_pose,
+                self.in_hand_pos,
+                self.goal_rot,
+                self.cfg.dist_reward_scale,
+                self.cfg.rot_reward_scale,
+                self.cfg.rot_eps,
+                self.actions,
+                self.cfg.action_penalty_scale,
+                self.cfg.success_tolerance,
+                self.cfg.reach_goal_bonus,
+                self.cfg.fall_dist,
+                self.cfg.fall_penalty,
+                self.cfg.action_space,
+                self.reset_goal_buf,
+                self.successes,
+                self._num_resets,
+                self._finished_cons_successes,
+                self.rewards,
+            ],
+            device=self.device,
+        )
+
+        # A separate kernel is needed as Warp does not support thread synchronization for reductions.
+        wp.launch(
+            update_consecutive_successes_from_stats,
+            dim=1,
+            inputs=[
+                self._num_resets,
+                self._finished_cons_successes,
+                self.cfg.av_factor,
+                self.consecutive_successes,
+            ],
+            device=self.device,
+        )
+
+        if "log" not in self.extras:
+            self.extras["log"] = dict()
+        # .mean() cannot be called here as it causes problems on stream
+        self.extras["log"]["consecutive_successes"] = wp.to_torch(self.consecutive_successes)
+
+        # Reset goals for envs that reached the target (mask is `reset_goal_buf`).
+        # This avoids Torch-side index extraction and keeps the step graphable.
+        self._reset_target_pose(mask=self.reset_goal_buf)
+
+    def _get_dones(self) -> None:
+        self._compute_intermediate_values()
+
+        wp.launch(
+            get_dones,
+            dim=self.num_envs,
+            inputs=[
+                self.max_episode_length,
+                self.object_pose,
+                self.in_hand_pos,
+                self.goal_rot,
+                self.cfg.fall_dist,
+                self.cfg.success_tolerance,
+                self.cfg.max_consecutive_success,
+                self.successes,
+                self._episode_length_buf_wp,
+                self.reset_terminated,
+                self.reset_time_outs,
+                self.reset_buf,
+            ],
+            device=self.device,
+        )
+
+    def _reset_idx(self, mask: wp.array | None = None):
+        if mask is None:
+            mask = self._ALL_ENV_MASK
+
+        # resets articulation and rigid body attributes
+        super()._reset_idx(mask)
+
+        # reset goals
+        self._reset_target_pose(mask=mask)
+
+        # reset object
+        wp.launch(
+            reset_object,
+            dim=self.num_envs,
+            inputs=[
+                self.object.data.default_root_pose,
+                self.env_origins,
+                self.cfg.reset_position_noise,
+                self.x_unit_vec,
+                self.y_unit_vec,
+                mask,
+                self.rng_state,
+                self.object.data.root_link_pose_w,
+                self.object.data.root_com_vel_w,
+            ],
+            device=self.device,
+        )
+
+        # reset hand
+        wp.launch(
+            reset_hand,
+            dim=self.num_envs,
+            inputs=[
+                self.hand.data.default_joint_pos,
+                self.hand.data.default_joint_vel,
+                self.hand_dof_lower_limits,
+                self.hand_dof_upper_limits,
+                self.cfg.reset_dof_pos_noise,
+                self.cfg.reset_dof_vel_noise,
+                mask,
+                self.num_hand_dofs,
+                self.rng_state,
+                self.hand.data.joint_pos,
+                self.hand.data.joint_vel,
+                self.prev_targets,
+                self.cur_targets,
+                self.hand_dof_targets,
+            ],
+            device=self.device,
+        )
+
+        self.hand.set_joint_position_target_mask(target=self.cur_targets, env_mask=mask)
+
+        wp.launch(
+            reset_successes,
+            dim=self.num_envs,
+            inputs=[
+                mask,
+                self.successes,
+            ],
+            device=self.device,
+        )
+
+        self._compute_intermediate_values()
+
+    def _reset_target_pose(self, env_ids: Sequence[int] | None = None, mask: wp.array | None = None):
+        # reset goal rotation
+        if mask is None:
+            if env_ids is None:
+                return
+            env_mask_list = [False] * self.num_envs
+            for env_id in env_ids:
+                env_mask_list[int(env_id)] = True
+            mask = wp.array(env_mask_list, dtype=wp.bool, device=self.device)
+
+        # update goal pose and markers
+        wp.launch(
+            reset_target_pose,
+            dim=self.num_envs,
+            inputs=[
+                mask,
+                self.x_unit_vec,
+                self.y_unit_vec,
+                self.env_origins,
+                self.goal_pos,
+                self.rng_state,
+                self.goal_rot,
+                self.reset_goal_buf,
+                self.goal_pos_w,
+            ],
+            device=self.device,
+        )
+
+    def _post_step_visualize(self) -> None:
+        """Update goal markers outside CUDA graph scope."""
+        self.goal_markers.visualize(wp.to_torch(self.goal_pos_w), wp.to_torch(self.goal_rot))
+
+    def _compute_intermediate_values(self):
+        # data for hand/object (Warp version of the Torch env's `_compute_intermediate_values`)
+        wp.launch(
+            compute_intermediate_values,
+            dim=self.num_envs,
+            inputs=[
+                self.hand.data.body_pos_w,
+                self.hand.data.body_quat_w,
+                self.hand.data.body_vel_w,
+                self.finger_bodies,
+                self.env_origins,
+                self.object.data.root_link_pose_w,
+                self.object.data.root_com_vel_w,
+                self.num_fingertips,
+                self.fingertip_pos,
+                self.fingertip_rot,
+                self.fingertip_velocities,
+                self.object_pose,
+                self.object_vels,
+            ],
+            device=self.device,
+        )
+
+    def compute_reduced_observations(self):
+        # Per https://arxiv.org/pdf/1808.00177.pdf Table 2
+        #   Fingertip positions
+        #   Object Position, but not orientation
+        #   Relative target orientation
+        wp.launch(
+            compute_reduced_observations,
+            dim=self.num_envs,
+            inputs=[
+                self.fingertip_pos,
+                self.object_pose,
+                self.goal_rot,
+                self.actions,
+                self.num_fingertips,
+                self.cfg.action_space,
+                self.observations,
+            ],
+            device=self.device,
+        )
+        # Warp-native non-finite sanitization + print-once.
+        wp.launch(
+            sanitize_and_print_once,
+            dim=(self.num_envs * self.cfg.observation_space),
+            inputs=[self.observations.flatten(), self.obs_nonfinite_flag],
+            device=self.device,
+        )
+        self.obs_nonfinite_flag.zero_()
+
+    def compute_full_observations(self):
+        wp.launch(
+            compute_full_observations,
+            dim=self.num_envs,
+            inputs=[
+                self.hand.data.joint_pos,
+                self.hand.data.joint_vel,
+                self.hand_dof_lower_limits,
+                self.hand_dof_upper_limits,
+                self.cfg.vel_obs_scale,
+                self.object_pose,
+                self.object_vels,
+                self.in_hand_pos,
+                self.goal_rot,
+                self.fingertip_pos,
+                self.fingertip_rot,
+                self.fingertip_velocities,
+                self.actions,
+                self.num_hand_dofs,
+                self.num_fingertips,
+                self.cfg.action_space,
+                self.observations,
+            ],
+            device=self.device,
+        )
+        # Warp-native non-finite sanitization + print-once.
+        wp.launch(
+            sanitize_and_print_once,
+            dim=(self.num_envs * self.cfg.observation_space),
+            inputs=[self.observations.flatten(), self.obs_nonfinite_flag],
+            device=self.device,
+        )
+        self.obs_nonfinite_flag.zero_()
diff --git a/source/isaaclab_tasks_experimental/pyproject.toml b/source/isaaclab_tasks_experimental/pyproject.toml
new file mode 100644
index 000000000000..d90ac3536f16
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel", "toml"]
+build-backend = "setuptools.build_meta"
diff --git a/source/isaaclab_tasks_experimental/setup.py b/source/isaaclab_tasks_experimental/setup.py
new file mode 100644
index 000000000000..ae77016f8984
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/setup.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Installation script for the 'isaaclab_tasks_experimental' python package."""
+
+import os
+
+import toml
+from setuptools import find_packages, setup
+
+# Obtain the extension data from the extension.toml file
+EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__))
+# Read the extension.toml file
+EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml"))
+
+# Installation operation
+setup(
+    name="isaaclab_tasks_experimental",
+    author="Isaac Lab Project Developers",
+    maintainer="Isaac Lab Project Developers",
+    url=EXTENSION_TOML_DATA["package"]["repository"],
+    version=EXTENSION_TOML_DATA["package"]["version"],
+    description=EXTENSION_TOML_DATA["package"]["description"],
+    keywords=EXTENSION_TOML_DATA["package"]["keywords"],
+    include_package_data=True,
+    python_requires=">=3.10",
+    install_requires=["isaaclab_tasks"],
+    packages=find_packages(),
+    classifiers=[
+        "Natural Language :: English",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Isaac Sim :: 5.0.0",
+        "Isaac Sim :: 5.1.0",
+        "Isaac Sim :: 6.0.0",
+    ],
+    zip_safe=False,
+)