From c5259f7a4e98cb9266b3a884261eacfe9cc5ee69 Mon Sep 17 00:00:00 2001
From: Jichuan Hu <leo.hu.sh@gmail.com>
Date: Fri, 27 Feb 2026 01:50:52 -0800
Subject: [PATCH 1/2] Add experimental warp envs and infrastructure from
 dev/newton

Add isaaclab_experimental package with DirectRLEnvWarp base class,
InteractiveSceneWarp, and WarpGraphCache utility.

Add direct warp environments in isaaclab_tasks_experimental:
- Cartpole, Ant, Humanoid, Locomotion (base), InHand Manipulation,
  Allegro Hand, with agent configs for rsl_rl, rl_games, skrl, sb3.

Adapt to develop base class API:
- find_joints 2-value return (indices, names)
- episode_length_buf as property with in-place copy for warp sync
- _ALL_ENV_MASK on base env instead of articulation
- set_joint_effort_target_mask for CUDA graph compatibility
- _get_observations returns dict for rsl_rl wrapper

Align solver configs with stable develop PresetCfg values.
Add safe_normalize to guard against NaN from wp.normalize.
Fix reset_root to_targets computation to use actual targets.
Fix cartpole reset kernel to use parameterized joint indices.
Clean up extension.toml and setup.py dependencies.
Switch timer import to isaaclab.utils.timer.
---
 CONTRIBUTORS.md                               |   1 +
 .../reinforcement_learning/rl_games/play.py   |   3 +
 .../reinforcement_learning/rl_games/train.py  |   3 +
 scripts/reinforcement_learning/rsl_rl/play.py |   3 +
 .../reinforcement_learning/rsl_rl/train.py    |   3 +
 scripts/reinforcement_learning/sb3/play.py    |   3 +
 scripts/reinforcement_learning/sb3/train.py   |   2 +
 scripts/reinforcement_learning/skrl/play.py   |   3 +
 scripts/reinforcement_learning/skrl/train.py  |   3 +
 .../config/extension.toml                     |  21 +
 .../isaaclab_experimental/__init__.py         |  20 +
 .../isaaclab_experimental/envs/__init__.py    |  51 +
 .../envs/direct_rl_env_warp.py                | 832 +++++++++++++++
 .../envs/interactive_scene_warp.py            |  44 +
 .../envs/utils/__init__.py                    |   6 +
 .../utils/warp_graph_cache.py                 |  73 ++
 source/isaaclab_experimental/pyproject.toml   |   3 +
 source/isaaclab_experimental/setup.py         |  44 +
 .../isaaclab_rl/rsl_rl/vecenv_wrapper.py      |  11 +-
 .../config/extension.toml                     |  22 +
 .../docs/README.md                            |   3 +
 .../isaaclab_tasks_experimental/__init__.py   |  32 +
 .../direct/__init__.py                        |  10 +
 .../direct/allegro_hand/__init__.py           |  29 +
 .../allegro_hand/allegro_hand_warp_env_cfg.py | 136 +++
 .../direct/ant/__init__.py                    |  28 +
 .../direct/ant/ant_env_warp.py                |  91 ++
 .../direct/cartpole/__init__.py               |  29 +
 .../direct/cartpole/cartpole_warp_env.py      | 357 +++++++
 .../direct/humanoid/__init__.py               |  28 +
 .../direct/humanoid/humanoid_warp_env.py      | 114 ++
 .../direct/inhand_manipulation/__init__.py    |   4 +
 .../inhand_manipulation_warp_env.py           | 980 ++++++++++++++++++
 .../direct/locomotion/__init__.py             |   4 +
 .../direct/locomotion/locomotion_env_warp.py  | 575 ++++++++++
 .../pyproject.toml                            |   3 +
 source/isaaclab_tasks_experimental/setup.py   |  40 +
 37 files changed, 3613 insertions(+), 1 deletion(-)
 create mode 100644 source/isaaclab_experimental/config/extension.toml
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/__init__.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py
 create mode 100644 source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py
 create mode 100644 source/isaaclab_experimental/pyproject.toml
 create mode 100644 source/isaaclab_experimental/setup.py
 create mode 100644 source/isaaclab_tasks_experimental/config/extension.toml
 create mode 100644 source/isaaclab_tasks_experimental/docs/README.md
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/ant_env_warp.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/cartpole_warp_env.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/humanoid_warp_env.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/__init__.py
 create mode 100644 source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/locomotion_env_warp.py
 create mode 100644 source/isaaclab_tasks_experimental/pyproject.toml
 create mode 100644 source/isaaclab_tasks_experimental/setup.py

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 201c70bf727e..c67997cf40be 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -93,6 +93,7 @@ Guidelines for modifications:
 * Ji Yuan Feng
 * Jia Lin Yuan
 * Jiakai Zhang
+* Jichuan Hu
 * Jinghuan Shang
 * Jingzhou Liu
 * Jinqi Wei
diff --git a/scripts/reinforcement_learning/rl_games/play.py b/scripts/reinforcement_learning/rl_games/play.py
index eb2390af90d6..dd61d80da530 100644
--- a/scripts/reinforcement_learning/rl_games/play.py
+++ b/scripts/reinforcement_learning/rl_games/play.py
@@ -6,6 +6,7 @@
 """Script to play a checkpoint if an RL agent from RL-Games."""
 
 import argparse
+import contextlib
 import math
 import os
 import random
@@ -29,6 +30,8 @@
 from isaaclab_tasks.utils import add_launcher_args, get_checkpoint_path, launch_simulation, resolve_task_config
 
 # PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
 
 # -- argparse ----------------------------------------------------------------
 parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from RL-Games.")
diff --git a/scripts/reinforcement_learning/rl_games/train.py b/scripts/reinforcement_learning/rl_games/train.py
index 5ad13b401bb3..4de23c19246a 100644
--- a/scripts/reinforcement_learning/rl_games/train.py
+++ b/scripts/reinforcement_learning/rl_games/train.py
@@ -6,6 +6,7 @@
 """Script to train RL agent with RL-Games."""
 
 import argparse
+import contextlib
 import logging
 import math
 import os
@@ -33,6 +34,8 @@
 logger = logging.getLogger(__name__)
 
 # PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
 
 # -- argparse ----------------------------------------------------------------
 parser = argparse.ArgumentParser(description="Train an RL agent with RL-Games.")
diff --git a/scripts/reinforcement_learning/rsl_rl/play.py b/scripts/reinforcement_learning/rsl_rl/play.py
index f790f627a22a..cb60206bdf99 100644
--- a/scripts/reinforcement_learning/rsl_rl/play.py
+++ b/scripts/reinforcement_learning/rsl_rl/play.py
@@ -6,6 +6,7 @@
 """Script to play a checkpoint if an RL agent from RSL-RL."""
 
 import argparse
+import contextlib
 import importlib.metadata as metadata
 import os
 import sys
@@ -37,6 +38,8 @@
 import cli_args  # isort: skip
 
 # PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
 
 # -- argparse ----------------------------------------------------------------
 parser = argparse.ArgumentParser(description="Train an RL agent with RSL-RL.")
diff --git a/scripts/reinforcement_learning/rsl_rl/train.py b/scripts/reinforcement_learning/rsl_rl/train.py
index 7ca2d3156da5..9c4c95da2309 100644
--- a/scripts/reinforcement_learning/rsl_rl/train.py
+++ b/scripts/reinforcement_learning/rsl_rl/train.py
@@ -6,6 +6,7 @@
 """Script to train RL agent with RSL-RL."""
 
 import argparse
+import contextlib
 import importlib.metadata as metadata
 import logging
 import os
@@ -35,6 +36,8 @@
 logger = logging.getLogger(__name__)
 
 # PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
 
 RSL_RL_VERSION = "3.0.1"
 
diff --git a/scripts/reinforcement_learning/sb3/play.py b/scripts/reinforcement_learning/sb3/play.py
index a6f222d346c1..a1f8757a1e8c 100644
--- a/scripts/reinforcement_learning/sb3/play.py
+++ b/scripts/reinforcement_learning/sb3/play.py
@@ -6,6 +6,7 @@
 """Script to play a checkpoint if an RL agent from Stable-Baselines3."""
 
 import argparse
+import contextlib
 import os
 import random
 import sys
@@ -27,6 +28,8 @@
 from isaaclab_tasks.utils import add_launcher_args, get_checkpoint_path, launch_simulation, resolve_task_config
 
 # PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
 
 # -- argparse ----------------------------------------------------------------
 parser = argparse.ArgumentParser(description="Play a checkpoint of an RL agent from Stable-Baselines3.")
diff --git a/scripts/reinforcement_learning/sb3/train.py b/scripts/reinforcement_learning/sb3/train.py
index bd79599d1fda..7bf757ef5483 100644
--- a/scripts/reinforcement_learning/sb3/train.py
+++ b/scripts/reinforcement_learning/sb3/train.py
@@ -35,6 +35,8 @@
 logger = logging.getLogger(__name__)
 
 # PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
 
 # -- argparse ----------------------------------------------------------------
 parser = argparse.ArgumentParser(description="Train an RL agent with Stable-Baselines3.")
diff --git a/scripts/reinforcement_learning/skrl/play.py b/scripts/reinforcement_learning/skrl/play.py
index 0349d405967e..3857c095150b 100644
--- a/scripts/reinforcement_learning/skrl/play.py
+++ b/scripts/reinforcement_learning/skrl/play.py
@@ -11,6 +11,7 @@
 """
 
 import argparse
+import contextlib
 import os
 import random
 import sys
@@ -30,6 +31,8 @@
 from isaaclab_tasks.utils import add_launcher_args, get_checkpoint_path, launch_simulation, resolve_task_config
 
 # PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
 
 SKRL_VERSION = "1.4.3"
 
diff --git a/scripts/reinforcement_learning/skrl/train.py b/scripts/reinforcement_learning/skrl/train.py
index 750ebeb8798d..edb124381647 100644
--- a/scripts/reinforcement_learning/skrl/train.py
+++ b/scripts/reinforcement_learning/skrl/train.py
@@ -11,6 +11,7 @@
 """
 
 import argparse
+import contextlib
 import logging
 import os
 import random
@@ -35,6 +36,8 @@
 logger = logging.getLogger(__name__)
 
 # PLACEHOLDER: Extension template (do not remove this comment)
+with contextlib.suppress(ImportError):
+    import isaaclab_tasks_experimental  # noqa: F401
 
 SKRL_VERSION = "1.4.3"
 
diff --git a/source/isaaclab_experimental/config/extension.toml b/source/isaaclab_experimental/config/extension.toml
new file mode 100644
index 000000000000..c1154edbe439
--- /dev/null
+++ b/source/isaaclab_experimental/config/extension.toml
@@ -0,0 +1,21 @@
+[package]
+
+# Note: Semantic Versioning is used: https://semver.org/
+version = "0.0.1"
+
+# Description
+title = "Experimental playground for upcoming IsaacLab features"
+description="Provides early access to future features that are not yet `production-ready`."
+readme  = "docs/README.md"
+repository = "https://github.com/isaac-sim/IsaacLab"
+category = "robotics"
+keywords = ["robotics", "simulation", "experimental"]
+
+[dependencies]
+"isaaclab" = {}
+
+[core]
+reloadable = false
+
+[[python.module]]
+name = "isaaclab_experimental"
diff --git a/source/isaaclab_experimental/isaaclab_experimental/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/__init__.py
new file mode 100644
index 000000000000..18e97e2dd188
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Package containing the core framework."""
+
+import os
+import toml
+from enum import IntEnum
+
+# Conveniences to other module directories via relative paths
+ISAACLAB_EXPERIMENTAL_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
+"""Path to the extension source directory."""
+
+ISAACLAB_EXPERIMENTAL_METADATA = toml.load(os.path.join(ISAACLAB_EXPERIMENTAL_EXT_DIR, "config", "extension.toml"))
+"""Extension metadata dictionary parsed from the extension.toml file."""
+
+# Configure the module-level variables
+__version__ = ISAACLAB_EXPERIMENTAL_METADATA["package"]["version"]
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py
new file mode 100644
index 000000000000..81c59dda7d51
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/envs/__init__.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Sub-package for environment definitions.
+
+Environments define the interface between the agent and the simulation.
+In the simplest case, the environment provides the agent with the current
+observations and executes the actions provided by the agent. However, the
+environment can also provide additional information such as the current
+reward, done flag, and information about the current episode.
+
+There are two types of environment designing workflows:
+
+* **Manager-based**: The environment is decomposed into individual components (or managers)
+  for different aspects (such as computing observations, applying actions, and applying
+  randomization. The users mainly configure the managers and the environment coordinates the
+  managers and calls their functions.
+* **Direct**: The user implements all the necessary functionality directly into a single class
+  directly without the need for additional managers.
+
+Based on these workflows, there are the following environment classes for single and multi-agent RL:
+
+**Single-Agent RL:**
+
+* :class:`ManagerBasedEnv`: The manager-based workflow base environment which only provides the
+  agent with the current observations and executes the actions provided by the agent.
+* :class:`ManagerBasedRLEnv`: The manager-based workflow RL task environment which besides the
+  functionality of the base environment also provides additional Markov Decision Process (MDP)
+  related information such as the current reward, done flag, and information.
+* :class:`DirectRLEnv`: The direct workflow RL task environment which provides implementations for
+  implementing scene setup, computing dones, performing resets, and computing reward and observation.
+
+**Multi-Agent RL (MARL):**
+
+* :class:`DirectMARLEnv`: The direct workflow MARL task environment which provides implementations for
+  implementing scene setup, computing dones, performing resets, and computing reward and observation.
+
+For more information about the workflow design patterns, see the `Task Design Workflows`_ section.
+
+.. _`Task Design Workflows`: https://isaac-sim.github.io/IsaacLab/source/features/task_workflows.html
+"""
+
+from .direct_rl_env_warp import DirectRLEnvWarp  # noqa: F401
+from .interactive_scene_warp import InteractiveSceneWarp  # noqa: F401
+
+__all__ = [
+    "DirectRLEnvWarp",
+    "InteractiveSceneWarp",
+]
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py
new file mode 100644
index 000000000000..9bb0b7ed3718
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/envs/direct_rl_env_warp.py
@@ -0,0 +1,832 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import contextlib
+import inspect
+import logging
+import math
+import os
+import weakref
+from abc import abstractmethod
+from dataclasses import MISSING
+from typing import Any, ClassVar
+
+import gymnasium as gym
+import numpy as np
+import torch
+
+# import omni.kit.app
+# import omni.log
+# import omni.physx
+import warp as wp
+
+from isaaclab.envs.common import VecEnvObs, VecEnvStepReturn
+from isaaclab.envs.direct_rl_env_cfg import DirectRLEnvCfg
+from isaaclab.envs.utils.spaces import sample_space, spec_to_gym_space
+
+# from isaaclab.envs.ui import ViewportCameraController
+from isaaclab.managers import EventManager
+from isaaclab.sim import SimulationContext
+from isaaclab.sim.utils import use_stage
+from isaaclab.utils.noise import NoiseModel
+from isaaclab.utils.seed import configure_seed
+from isaaclab.utils.timer import Timer
+
+from isaaclab_experimental.envs.interactive_scene_warp import InteractiveSceneWarp
+from isaaclab_experimental.utils.warp_graph_cache import WarpGraphCache
+
+# from isaacsim.core.simulation_manager import SimulationManager
+# from isaacsim.core.version import get_version
+
+
+# import logger
+logger = logging.getLogger(__name__)
+
+DEBUG_TIMER_STEP = os.environ.get("DEBUG_TIMER_STEP", "0") == "1"
+"""Enable outer step() timer only. Set DEBUG_TIMER_STEP=1 env var to enable."""
+
+DEBUG_TIMERS = os.environ.get("DEBUG_TIMERS", "0") == "1"
+"""Enable all fine-grained inner timers (adds wp.synchronize per sub-phase). Set DEBUG_TIMERS=1 env var to enable."""
+
+
+@wp.kernel
+def zero_mask_int32(
+    mask: wp.array(dtype=wp.bool),
+    data: wp.array(dtype=wp.int32),
+):
+    env_index = wp.tid()
+    if mask[env_index]:
+        data[env_index] = 0
+
+
+@wp.kernel
+def add_to_env(
+    data: wp.array(dtype=wp.int32),
+    value: wp.int32,
+):
+    env_index = wp.tid()
+    data[env_index] += value
+
+
+class DirectRLEnvWarp(gym.Env):
+    """The superclass for the direct workflow to design environments.
+
+    This class implements the core functionality for reinforcement learning (RL)
+    environments. It is designed to be used with any RL library. The class is designed
+    to be used with vectorized environments, i.e., the environment is expected to be run
+    in parallel with multiple sub-environments.
+
+    While the environment itself is implemented as a vectorized environment, we do not
+    inherit from :class:`gym.vector.VectorEnv`. This is mainly because the class adds
+    various methods (for wait and asynchronous updates) which are not required.
+    Additionally, each RL library typically has its own definition for a vectorized
+    environment. Thus, to reduce complexity, we directly use the :class:`gym.Env` over
+    here and leave it up to library-defined wrappers to take care of wrapping this
+    environment for their agents.
+
+    Note:
+        For vectorized environments, it is recommended to **only** call the :meth:`reset`
+        method once before the first call to :meth:`step`, i.e. after the environment is created.
+        After that, the :meth:`step` function handles the reset of terminated sub-environments.
+        in a vectorized environment.
+
+    """
+
+    is_vector_env: ClassVar[bool] = True
+    """Whether the environment is a vectorized environment."""
+    metadata: ClassVar[dict[str, Any]] = {
+        "render_modes": [None, "human", "rgb_array"],
+        # "isaac_sim_version": get_version(),
+    }
+    """Metadata for the environment."""
+
+    def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs):
+        """Initialize the environment.
+
+        Args:
+            cfg: The configuration object for the environment.
+            render_mode: The render mode for the environment. Defaults to None, which
+                is similar to ``"human"``.
+
+        Raises:
+            RuntimeError: If a simulation context already exists. The environment must always create one
+                since it configures the simulation context and controls the simulation.
+        """
+        # check that the config is valid
+        cfg.validate()
+        # store inputs to class
+        self.cfg = cfg
+        # store the render mode
+        self.render_mode = render_mode
+        # initialize internal variables
+        self._is_closed = False
+
+        # set the seed for the environment
+        if self.cfg.seed is not None:
+            self.cfg.seed = self.seed(self.cfg.seed)
+        else:
+            logger.warning("Seed not set for the environment. The environment creation may not be deterministic.")
+
+        # create a simulation context to control the simulator
+        if SimulationContext.instance() is None:
+            self.sim: SimulationContext = SimulationContext(self.cfg.sim)
+        else:
+            raise RuntimeError("Simulation context already exists. Cannot create a new one.")
+
+        # make sure torch is running on the correct device
+        if "cuda" in self.device:
+            torch.cuda.set_device(self.device)
+
+        # print useful information
+        print("[INFO]: Base environment:")
+        print(f"\tEnvironment device    : {self.device}")
+        print(f"\tEnvironment seed      : {self.cfg.seed}")
+        print(f"\tPhysics step-size     : {self.physics_dt}")
+        print(f"\tRendering step-size   : {self.physics_dt * self.cfg.sim.render_interval}")
+        print(f"\tEnvironment step-size : {self.step_dt}")
+
+        if self.cfg.sim.render_interval < self.cfg.decimation:
+            msg = (
+                f"The render interval ({self.cfg.sim.render_interval}) is smaller than the decimation "
+                f"({self.cfg.decimation}). Multiple render calls will happen for each environment step."
+                "If this is not intended, set the render interval to be equal to the decimation."
+            )
+            logger.warning(msg)
+
+        # generate scene
+        with Timer("[INFO]: Time taken for scene creation", "scene_creation"):
+            # set the stage context for scene creation steps which use the stage
+            with use_stage(self.sim.stage):
+                self.scene = InteractiveSceneWarp(self.cfg.scene)
+                self._setup_scene()
+                # attach_stage_to_usd_context()
+        print("[INFO]: Scene manager: ", self.scene)
+
+        # set up camera viewport controller
+        # viewport is not available in other rendering modes so the function will throw a warning
+        # FIXME: This needs to be fixed in the future when we unify the UI functionalities even for
+        # non-rendering modes.
+        has_gui = bool(self.sim.get_setting("/isaaclab/has_gui"))
+        offscreen_render = bool(self.sim.get_setting("/isaaclab/render/offscreen"))
+        if has_gui or offscreen_render:
+            # self.viewport_camera_controller = ViewportCameraController(self, self.cfg.viewer)
+            self.viewport_camera_controller = None
+        else:
+            self.viewport_camera_controller = None
+
+        # create event manager
+        # note: this is needed here (rather than after simulation play) to allow USD-related randomization events
+        #   that must happen before the simulation starts. Example: randomizing mesh scale
+        if self.cfg.events:
+            self.event_manager = EventManager(self.cfg.events, self)
+
+            # apply USD-related randomization events
+            if "prestartup" in self.event_manager.available_modes:
+                self.event_manager.apply(mode="prestartup")
+
+        # play the simulator to activate physics handles
+        # note: this activates the physics simulation view that exposes TensorAPIs
+        # note: when started in extension mode, first call sim.reset_async() and then initialize the managers
+        # if builtins.ISAAC_LAUNCHED_FROM_TERMINAL is False:
+        #     print("[INFO]: Starting the simulation. This may take a few seconds. Please wait...")
+        with Timer("[INFO]: Time taken for simulation start", "simulation_start"):
+            # since the reset can trigger callbacks which use the stage,
+            # we need to set the stage context here
+            with use_stage(self.sim.stage):
+                self.sim.reset()
+            # update scene to pre populate data buffers for assets and sensors.
+            # this is needed for the observation manager to get valid tensors for initialization.
+            # this shouldn't cause an issue since later on, users do a reset over all the
+            # environments so the lazy buffers would be reset.
+            self.scene.update(dt=self.physics_dt)
+
+        # check if debug visualization is has been implemented by the environment
+        source_code = inspect.getsource(self._set_debug_vis_impl)
+        self.has_debug_vis_implementation = "NotImplementedError" not in source_code
+        self._debug_vis_handle = None
+
+        # extend UI elements
+        # we need to do this here after all the managers are initialized
+        # this is because they dictate the sensors and commands right now
+        if bool(self.sim.settings.get("/isaaclab/visualizer")) and self.cfg.ui_window_class_type is not None:
+            self._window = self.cfg.ui_window_class_type(self, window_name="IsaacLab")
+        else:
+            # if no window, then we don't need to store the window
+            self._window = None
+
+        # allocate dictionary to store metrics
+        self.extras = {}
+
+        # initialize data and constants
+        # -- counter for simulation steps
+        self._sim_step_counter = 0
+        # -- counter for curriculum
+        self.common_step_counter = 0
+        # -- init buffers
+        self._episode_length_buf_wp = wp.zeros(self.num_envs, dtype=wp.int32, device=self.device)
+        self._episode_length_buf_torch = wp.to_torch(self._episode_length_buf_wp)
+        self.reset_terminated = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device)
+        self.reset_time_outs = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device)
+        self.reset_buf = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device)
+        self._ALL_ENV_MASK = wp.ones(self.num_envs, dtype=wp.bool, device=self.device)
+
+        # Expected bindings:
+        self.torch_obs_buf: torch.Tensor = None
+        self.torch_reward_buf: torch.Tensor = None
+        self.torch_reset_terminated: torch.Tensor = None
+        self.torch_reset_time_outs: torch.Tensor = None
+        self.torch_episode_length_buf: torch.Tensor = None
+
+        # Warp CUDA graph cache for capture-or-replay
+        self._graph_cache = WarpGraphCache()
+
+        # setup the action and observation spaces for Gym
+        self._configure_gym_env_spaces()
+
+        # setup noise cfg for adding action and observation noise
+        if self.cfg.action_noise_model:
+            self._action_noise_model: NoiseModel = self.cfg.action_noise_model.class_type(
+                self.cfg.action_noise_model, num_envs=self.num_envs, device=self.device
+            )
+        if self.cfg.observation_noise_model:
+            self._observation_noise_model: NoiseModel = self.cfg.observation_noise_model.class_type(
+                self.cfg.observation_noise_model, num_envs=self.num_envs, device=self.device
+            )
+
+        # perform events at the start of the simulation
+        if self.cfg.events:
+            # we print it here to make the logging consistent
+            print("[INFO] Event Manager: ", self.event_manager)
+
+            if "startup" in self.event_manager.available_modes:
+                self.event_manager.apply(mode="startup")
+
+        # set the framerate of the gym video recorder wrapper so that the playback speed of the produced
+        # video matches the simulation
+        self.metadata["render_fps"] = 1 / self.step_dt
+
+        # print the environment information
+        print("[INFO]: Completed setting up the environment...")
+
+    def __del__(self):
+        """Cleanup for the environment."""
+        # Suppress errors during Python shutdown to avoid noisy tracebacks
+        # Note: contextlib may be None during interpreter shutdown
+        if contextlib is not None:
+            with contextlib.suppress(ImportError, AttributeError, TypeError):
+                self.close()
+
+    """
+    Properties.
+    """
+
+    @property
+    def num_envs(self) -> int:
+        """The number of instances of the environment that are running."""
+        return self.scene.num_envs
+
+    @property
+    def physics_dt(self) -> float:
+        """The physics time-step (in s).
+
+        This is the lowest time-decimation at which the simulation is happening.
+        """
+        return self.cfg.sim.dt
+
+    @property
+    def step_dt(self) -> float:
+        """The environment stepping time-step (in s).
+
+        This is the time-step at which the environment steps forward.
+        """
+        return self.cfg.sim.dt * self.cfg.decimation
+
+    @property
+    def device(self):
+        """The device on which the environment is running."""
+        return self.sim.device
+
+    @property
+    def max_episode_length_s(self) -> float:
+        """Maximum episode length in seconds."""
+        return self.cfg.episode_length_s
+
+    @property
+    def max_episode_length(self):
+        """The maximum episode length in steps adjusted from s."""
+        return math.ceil(self.max_episode_length_s / (self.cfg.sim.dt * self.cfg.decimation))
+
+    @property
+    def episode_length_buf(self) -> torch.Tensor:
+        """The episode length buffer as a torch tensor.
+
+        This is a view of the underlying warp array ``_episode_length_buf_wp``.
+        Setting this property copies values in-place to preserve the shared
+        memory link with warp kernels.
+        """
+        return self._episode_length_buf_torch
+
+    @episode_length_buf.setter
+    def episode_length_buf(self, value: torch.Tensor):
+        self._episode_length_buf_torch.copy_(value)
+
+    """
+    Operations.
+    """
+
+    def reset(self, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[VecEnvObs, dict]:
+        """Resets all the environments and returns observations.
+
+        This function calls the :meth:`_reset_idx` function to reset all the environments.
+        However, certain operations, such as procedural terrain generation, that happened during initialization
+        are not repeated.
+
+        Args:
+            seed: The seed to use for randomization. Defaults to None, in which case the seed is not set.
+            options: Additional information to specify how the environment is reset. Defaults to None.
+
+                Note:
+                    This argument is used for compatibility with Gymnasium environment definition.
+
+        Returns:
+            A tuple containing the observations and extras.
+        """
+        # set the seed
+        if seed is not None:
+            self.seed(seed)
+
+        # reset state of scene
+        self._reset_idx(self._ALL_ENV_MASK)
+
+        # update articulation kinematics
+        self.scene.write_data_to_sim()
+
+        # if sensors are added to the scene, make sure we render to reflect changes in reset
+        if hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset:
+            self.sim.render()
+
+        # if self.cfg.wait_for_textures and self.sim.has_rtx_sensors():
+        #     while SimulationManager.assets_loading():
+        #         self.sim.render()
+
+        # return observations
+        self._get_observations()
+        return {"policy": self.torch_obs_buf.clone()}, self.extras
+
+    @Timer(name="env_step", msg="Step took:", enable=DEBUG_TIMER_STEP or DEBUG_TIMERS)
+    def step(self, action: torch.Tensor) -> VecEnvStepReturn:
+        """Execute one time-step of the environment's dynamics.
+
+        The environment steps forward at a fixed time-step, while the physics simulation is decimated at a
+        lower time-step. This is to ensure that the simulation is stable. These two time-steps can be configured
+        independently using the :attr:`DirectRLEnvCfg.decimation` (number of simulation steps per environment step)
+        and the :attr:`DirectRLEnvCfg.sim.physics_dt` (physics time-step). Based on these parameters, the environment
+        time-step is computed as the product of the two.
+
+        This function performs the following steps:
+
+        1. Pre-process the actions before stepping through the physics.
+        2. Apply the actions to the simulator and step through the physics in a decimated manner.
+        3. Compute the reward and done signals.
+        4. Reset environments that have terminated or reached the maximum episode length.
+        5. Apply interval events if they are enabled.
+        6. Compute observations.
+
+        Args:
+            action: The actions to apply on the environment. Shape is (num_envs, action_dim).
+
+        Returns:
+            A tuple containing the observations, rewards, resets (terminated and truncated) and extras.
+        """
+
+        action = action.to(self.device)
+        # add action noise
+        if self.cfg.action_noise_model:
+            action = self._action_noise_model(action)
+
+        # process actions, #TODO pass the torch tensor directly.
+        with Timer(name="pre_physics", msg="Pre-physics step took:", enable=DEBUG_TIMERS):
+            self._pre_physics_step(
+                wp.from_torch(action)
+            )  # Creates a tensor and discards it. Not graphable unless training loop reuses the same pointer.
+
+        # check if we need to do rendering within the physics loop
+        # note: checked here once to avoid multiple checks within the loop
+        _has_rtx = hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors()
+        is_rendering = bool(self.sim.settings.get("/isaaclab/visualizer")) or _has_rtx
+
+        # perform physics stepping
+        with Timer(name="physics_loop", msg="Physics loop took:", enable=DEBUG_TIMERS):
+            for _ in range(self.cfg.decimation):
+                self._sim_step_counter += 1
+                # set actions into buffers
+                # simulate
+                with Timer(name="apply_action", msg="Action processing step took:", enable=DEBUG_TIMERS):
+                    self._graph_cache.capture_or_replay("action", self.step_warp_action)
+
+                # write_data_to_sim runs outside the CUDA graph because _apply_actuator_model
+                # uses torch ops (wp.to_torch + torch arithmetic) that cross CUDA streams.
+                with Timer(name="write_data_to_sim_loop", msg="Write data to sim (loop) took:", enable=DEBUG_TIMERS):
+                    self.scene.write_data_to_sim()
+
+                with Timer(name="simulate", msg="Newton simulation step took:", enable=DEBUG_TIMERS):
+                    self.sim.step(render=False)
+                # render between steps only if the GUI or an RTX sensor needs it
+                # note: we assume the render interval to be the shortest accepted rendering interval.
+                #    If a camera needs rendering at a faster frequency, this will lead to unexpected behavior.
+                if self._sim_step_counter % self.cfg.sim.render_interval == 0 and is_rendering:
+                    self.sim.render()
+                # update buffers at sim dt
+                with Timer(name="scene_update", msg="Scene update took:", enable=DEBUG_TIMERS):
+                    self.scene.update(dt=self.physics_dt)
+
+        self.common_step_counter += 1  # total step (common for all envs)
+        with Timer(name="end_pre_graph", msg="End pre-graph took:", enable=DEBUG_TIMERS):
+            self._graph_cache.capture_or_replay("end_pre", self._step_warp_end_pre)
+        # write_data_to_sim runs uncaptured — it uses torch ops that cross CUDA streams.
+        with Timer(name="write_data_to_sim_post", msg="Write data to sim (post-reset) took:", enable=DEBUG_TIMERS):
+            self.scene.write_data_to_sim()
+        with Timer(name="end_post_graph", msg="End post-graph took:", enable=DEBUG_TIMERS):
+            self._graph_cache.capture_or_replay("end_post", self._step_warp_end_post)
+
+        # Visualization hook — runs after CUDA graph scope. Override in subclass
+        # to update markers or other non-graphable visual elements.
+        with Timer(name="visualize", msg="Visualize took:", enable=DEBUG_TIMERS):
+            self._post_step_visualize()
+
+        # return observations, rewards, resets and extras
+        return (
+            {"policy": self.torch_obs_buf.clone()},
+            self.torch_reward_buf,
+            self.torch_reset_terminated,
+            self.torch_reset_time_outs,
+            self.extras,
+        )
+
+    def _post_step_visualize(self) -> None:
+        """Hook for updating visualization markers after CUDA graph scope.
+
+        Override in subclass to update markers or other non-graphable visual
+        elements (e.g., those requiring wp.to_torch + .cpu().numpy()).
+        This runs every step, outside any CUDA graph capture.
+        """
+        pass
+
+    def step_warp_action(self) -> None:
+        self._apply_action()
+        # Note: scene.write_data_to_sim() is called separately outside the CUDA graph
+        # capture scope because it invokes _apply_actuator_model() which uses torch
+        # arithmetic (wp.to_torch + torch ops). This would cause a CUDA stream crossing
+        # error during graph capture. Moving it outside is safe since it runs every step.
+
+    def _step_warp_end_pre(self) -> None:
+        """Capturable portion before write_data_to_sim (pure warp kernels)."""
+        wp.launch(
+            add_to_env,
+            dim=self.num_envs,
+            inputs=[
+                self._episode_length_buf_wp,
+                1,
+            ],
+        )
+        self._get_dones()
+        self._get_rewards()
+
+        # -- reset envs that terminated/timed-out and log the episode information
+        self._reset_idx(mask=self.reset_buf)
+
+    def _step_warp_end_post(self) -> None:
+        """Capturable portion after write_data_to_sim (pure warp kernels)."""
+        # if sensors are added to the scene, make sure we render to reflect changes in reset
+        # if self.sim.has_rtx_sensors() and self.cfg.rerender_on_reset:
+        #    self.sim.render()
+
+        # TODO We could split it out.
+        # post-step: step interval event
+        # if self.cfg.events:
+        #    if "interval" in self.event_manager.available_modes:
+        #        self.event_manager.apply(mode="interval", dt=self.step_dt)
+
+        # update observations
+        self._get_observations()
+
+        # add observation noise
+        # note: we apply no noise to the state space (since it is used for critic networks)
+        # if self.cfg.observation_noise_model:
+        #    self.obs_buf["policy"] = self._observation_noise_model(self.obs_buf["policy"])
+
+    @staticmethod
+    def seed(seed: int = -1) -> int:
+        """Set the seed for the environment.
+
+        Args:
+            seed: The seed for random generator. Defaults to -1.
+
+        Returns:
+            The seed used for random generator.
+        """
+        # set seed for replicator
+        try:
+            import omni.replicator.core as rep
+
+            rep.set_global_seed(seed)
+        except ModuleNotFoundError:
+            pass
+        # set seed for torch and other libraries
+        return configure_seed(seed)
+
+    def render(self, recompute: bool = False) -> np.ndarray | None:
+        """Run rendering without stepping through the physics.
+
+        By convention, if mode is:
+
+        - **human**: Render to the current display and return nothing. Usually for human consumption.
+        - **rgb_array**: Return an numpy.ndarray with shape (x, y, 3), representing RGB values for an
+          x-by-y pixel image, suitable for turning into a video.
+
+        Args:
+            recompute: Whether to force a render even if the simulator has already rendered the scene.
+                Defaults to False.
+
+        Returns:
+            The rendered image as a numpy array if mode is "rgb_array". Otherwise, returns None.
+
+        Raises:
+            RuntimeError: If mode is set to "rgb_data" and simulation render mode does not support it.
+                In this case, the simulation render mode must be set to ``RenderMode.PARTIAL_RENDERING``
+                or ``RenderMode.FULL_RENDERING``.
+            NotImplementedError: If an unsupported rendering mode is specified.
+        """
+        # run a rendering step of the simulator
+        # if we have rtx sensors, we do not need to render again sim
+        if not (hasattr(self.sim, "has_rtx_sensors") and self.sim.has_rtx_sensors()) and not recompute:
+            self.sim.render()
+        # decide the rendering mode
+        if self.render_mode == "human" or self.render_mode is None:
+            return None
+        elif self.render_mode == "rgb_array":
+            # check that if any render could have happened
+            has_gui = bool(self.sim.get_setting("/isaaclab/has_gui"))
+            offscreen_render = bool(self.sim.get_setting("/isaaclab/render/offscreen"))
+            # Rendering is possible if we have GUI or offscreen rendering enabled
+            can_render = has_gui or offscreen_render
+
+            if not can_render:
+                render_mode_name = "NO_GUI_OR_RENDERING"
+                raise RuntimeError(
+                    f"Cannot render '{self.render_mode}' when the simulation render mode is"
+                    f" '{render_mode_name}'. Please set the simulation render mode"
+                    " to:'PARTIAL_RENDERING' or"
+                    " 'FULL_RENDERING'. If running headless, make"
+                    " sure --enable_cameras is set."
+                )
+            # create the annotator if it does not exist
+            if not hasattr(self, "_rgb_annotator"):
+                import omni.replicator.core as rep
+
+                # create render product
+                self._render_product = rep.create.render_product(
+                    self.cfg.viewer.cam_prim_path, self.cfg.viewer.resolution
+                )
+                # create rgb annotator -- used to read data from the render product
+                self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu")
+                self._rgb_annotator.attach([self._render_product])
+            # obtain the rgb data
+            rgb_data = self._rgb_annotator.get_data()
+            # convert to numpy array
+            rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape)
+            # return the rgb data
+            # note: initially the renerer is warming up and returns empty data
+            if rgb_data.size == 0:
+                return np.zeros((self.cfg.viewer.resolution[1], self.cfg.viewer.resolution[0], 3), dtype=np.uint8)
+            else:
+                return rgb_data[:, :, :3]
+        else:
+            raise NotImplementedError(
+                f"Render mode '{self.render_mode}' is not supported. Please use: {self.metadata['render_modes']}."
+            )
+
+    def close(self):
+        """Cleanup for the environment."""
+        if not self._is_closed:
+            # close entities related to the environment
+            # note: this is order-sensitive to avoid any dangling references
+            if self.cfg.events:
+                del self.event_manager
+            del self.scene
+            if self.viewport_camera_controller is not None:
+                del self.viewport_camera_controller
+
+            # # clear callbacks and instance
+            # if float(".".join(get_version()[2])) >= 5:
+            #     if self.cfg.sim.create_stage_in_memory:
+            #         # detach physx stage
+            #         omni.physx.get_physx_simulation_interface().detach_stage()
+            #         self.sim.stop()
+            #         self.sim.clear()
+
+            # self.sim.clear_all_callbacks()
+            self.sim.clear_instance()
+
+            # destroy the window
+            if self._window is not None:
+                self._window = None
+            # update closing status
+            self._is_closed = True
+
+    """
+    Operations - Debug Visualization.
+    """
+
+    def set_debug_vis(self, debug_vis: bool) -> bool:
+        """Toggles the environment debug visualization.
+
+        Args:
+            debug_vis: Whether to visualize the environment debug visualization.
+
+        Returns:
+            Whether the debug visualization was successfully set. False if the environment
+            does not support debug visualization.
+        """
+        # check if debug visualization is supported
+        if not self.has_debug_vis_implementation:
+            return False
+        # toggle debug visualization objects
+        self._set_debug_vis_impl(debug_vis)
+        # toggle debug visualization handles
+        if debug_vis:
+            import omni.kit.app
+
+            # create a subscriber for the post update event if it doesn't exist
+            if self._debug_vis_handle is None:
+                app_interface = omni.kit.app.get_app_interface()
+                self._debug_vis_handle = app_interface.get_post_update_event_stream().create_subscription_to_pop(
+                    lambda event, obj=weakref.proxy(self): obj._debug_vis_callback(event)
+                )
+        else:
+            # remove the subscriber if it exists
+            if self._debug_vis_handle is not None:
+                self._debug_vis_handle.unsubscribe()
+                self._debug_vis_handle = None
+        # return success
+        return True
+
+    """
+    Helper functions.
+    """
+
+    def _configure_gym_env_spaces(self):
+        """Configure the action and observation spaces for the Gym environment."""
+        # show deprecation message and overwrite configuration
+        if self.cfg.num_actions is not None:
+            logger.warning("DirectRLEnvCfg.num_actions is deprecated. Use DirectRLEnvCfg.action_space instead.")
+            if isinstance(self.cfg.action_space, type(MISSING)):
+                self.cfg.action_space = self.cfg.num_actions
+        if self.cfg.num_observations is not None:
+            logger.warning(
+                "DirectRLEnvCfg.num_observations is deprecated. Use DirectRLEnvCfg.observation_space instead."
+            )
+            if isinstance(self.cfg.observation_space, type(MISSING)):
+                self.cfg.observation_space = self.cfg.num_observations
+        if self.cfg.num_states is not None:
+            logger.warning("DirectRLEnvCfg.num_states is deprecated. Use DirectRLEnvCfg.state_space instead.")
+            if isinstance(self.cfg.state_space, type(MISSING)):
+                self.cfg.state_space = self.cfg.num_states
+
+        # set up spaces
+        self.single_observation_space = gym.spaces.Dict()
+        self.single_observation_space["policy"] = spec_to_gym_space(self.cfg.observation_space)
+        self.single_action_space = spec_to_gym_space(self.cfg.action_space)
+
+        # batch the spaces for vectorized environments
+        self.observation_space = gym.vector.utils.batch_space(self.single_observation_space["policy"], self.num_envs)
+        self.action_space = gym.vector.utils.batch_space(self.single_action_space, self.num_envs)
+
+        # optional state space for asymmetric actor-critic architectures
+        self.state_space = None
+        if self.cfg.state_space:
+            self.single_observation_space["critic"] = spec_to_gym_space(self.cfg.state_space)
+            self.state_space = gym.vector.utils.batch_space(self.single_observation_space["critic"], self.num_envs)
+
+        # instantiate actions (needed for tasks for which the observations computation is dependent on the actions)
+        self.actions = sample_space(self.single_action_space, self.sim.device, batch_size=self.num_envs, fill_value=0)
+
+    def _reset_idx(self, mask: wp.array | None = None):
+        """Reset environments based on a boolean mask.
+
+        Args:
+            mask: Boolean mask indicating which environments to reset.
+                Shape is (num_envs,). If None, all environments are reset.
+        """
+        if mask is None:
+            mask = self._ALL_ENV_MASK
+        self.scene.reset(env_ids=None, env_mask=mask)
+
+        # apply events such as randomization for environments that need a reset
+        # if self.cfg.events:
+        #    if "reset" in self.event_manager.available_modes:
+        #        env_step_count = self._sim_step_counter // self.cfg.decimation
+        #        self.event_manager.apply(mode="reset", env_ids=env_ids, global_env_step_count=env_step_count)
+
+        # reset noise models
+        # if self.cfg.action_noise_model:
+        #    self._action_noise_model.reset(env_ids)
+        # if self.cfg.observation_noise_model:
+        #    self._observation_noise_model.reset(env_ids)
+
+        # reset the episode length buffer
+        wp.launch(
+            zero_mask_int32,
+            dim=self.num_envs,
+            inputs=[
+                mask,
+                self._episode_length_buf_wp,
+            ],
+        )
+
+    """
+    Implementation-specific functions.
+    """
+
+    def _setup_scene(self):
+        """Setup the scene for the environment.
+
+        This function is responsible for creating the scene objects and setting up the scene for the environment.
+        The scene creation can happen through :class:`isaaclab.scene.InteractiveSceneCfg` or through
+        directly creating the scene objects and registering them with the scene manager.
+
+        We leave the implementation of this function to the derived classes. If the environment does not require
+        any explicit scene setup, the function can be left empty.
+        """
+        pass
+
+    @abstractmethod
+    def _pre_physics_step(self, actions: wp.array) -> None:
+        """Pre-process actions before stepping through the physics.
+
+        This function is responsible for pre-processing the actions before stepping through the physics.
+        It is called before the physics stepping (which is decimated).
+
+        Args:
+            actions: The actions to apply on the environment. Shape is (num_envs, action_dim).
+        """
+        raise NotImplementedError(f"Please implement the '_pre_physics_step' method for {self.__class__.__name__}.")
+
+    @abstractmethod
+    def _apply_action(self) -> None:
+        """Apply actions to the simulator.
+
+        This function is responsible for applying the actions to the simulator. It is called at each
+        physics time-step. Must be pure warp (no torch ops) to be CUDA graph capturable.
+        """
+        raise NotImplementedError(f"Please implement the '_apply_action' method for {self.__class__.__name__}.")
+
+    @abstractmethod
+    def _get_observations(self) -> dict:
+        """Compute and return the observations for the environment.
+
+        Returns:
+            The observations dictionary, e.g. ``{"policy": tensor}``.
+        """
+        raise NotImplementedError(f"Please implement the '_get_observations' method for {self.__class__.__name__}.")
+
+    def _get_states(self) -> VecEnvObs | None:
+        """Compute and return the states for the environment.
+
+        The state-space is used for asymmetric actor-critic architectures. It is configured
+        using the :attr:`DirectRLEnvCfg.state_space` parameter.
+
+        Returns:
+            The states for the environment. If the environment does not have a state-space, the function
+            returns a None.
+        """
+        return None  # noqa: R501
+
+    @abstractmethod
+    def _get_rewards(self) -> None:
+        """Compute the rewards for the environment.
+
+        Writes results into the reward buffer (e.g., ``self.reward_buf``).
+        """
+        raise NotImplementedError(f"Please implement the '_get_rewards' method for {self.__class__.__name__}.")
+
+    @abstractmethod
+    def _get_dones(self) -> None:
+        """Compute the done flags for the environment.
+
+        Writes results into the done buffers (e.g., ``self.reset_terminated``, ``self.reset_time_outs``).
+        """
+        raise NotImplementedError(f"Please implement the '_get_dones' method for {self.__class__.__name__}.")
+
+    def _set_debug_vis_impl(self, debug_vis: bool):
+        """Set debug visualization into visualization objects.
+
+        This function is responsible for creating the visualization objects if they don't exist
+        and input ``debug_vis`` is True. If the visualization objects exist, the function should
+        set their visibility into the stage.
+        """
+        raise NotImplementedError(f"Debug visualization is not implemented for {self.__class__.__name__}.")
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py b/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py
new file mode 100644
index 000000000000..f792b994f67b
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/envs/interactive_scene_warp.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Warp-native interactive scene with env_mask support for reset."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import warp as wp
+
+from isaaclab.scene import InteractiveScene
+
+
+class InteractiveSceneWarp(InteractiveScene):
+    """Interactive scene with warp-native env_mask support for reset.
+
+    Extends :class:`InteractiveScene` to accept a boolean warp mask for selective resets,
+    avoiding the need to convert between env_ids and masks.
+    """
+
+    def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None = None):
+        """Reset scene entities using either env_ids or a boolean env_mask.
+
+        Args:
+            env_ids: The indices of the environments to reset. Defaults to None (all instances).
+            env_mask: Boolean warp mask of shape (num_envs,). Defaults to None.
+        """
+        # -- assets (support env_mask)
+        for articulation in self._articulations.values():
+            articulation.reset(env_ids, env_mask=env_mask)
+        for deformable_object in self._deformable_objects.values():
+            deformable_object.reset(env_ids)
+        for rigid_object in self._rigid_objects.values():
+            rigid_object.reset(env_ids, env_mask=env_mask)
+        for surface_gripper in self._surface_grippers.values():
+            surface_gripper.reset(env_ids)
+        for rigid_object_collection in self._rigid_object_collections.values():
+            rigid_object_collection.reset(env_ids, env_mask=env_mask)
+        # -- sensors (no env_mask support)
+        for sensor in self._sensors.values():
+            sensor.reset(env_ids)
diff --git a/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py
new file mode 100644
index 000000000000..d28381b15b76
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/envs/utils/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Sub-package for environment utils."""
diff --git a/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py b/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py
new file mode 100644
index 000000000000..d2ac09bd4f70
--- /dev/null
+++ b/source/isaaclab_experimental/isaaclab_experimental/utils/warp_graph_cache.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Warp CUDA graph capture-or-replay utility."""
+
+from collections.abc import Callable
+from typing import Any
+
+import warp as wp
+
+
+class WarpGraphCache:
+    """Caches Warp CUDA graphs by stage name: captures on first call, replays after.
+
+    On the first call for a given stage, the function is recorded into a CUDA
+    graph and then **immediately replayed** so the graph is validated right away.
+    The return value from the capture run is cached and returned on every
+    subsequent replay, ensuring captured stages return the same references
+    (e.g. tensor views) as eager stages.
+
+    Usage::
+
+        cache = WarpGraphCache()
+        result = cache.capture_or_replay("my_stage", my_warp_function)
+        # uncaptured work here ...
+        result2 = cache.capture_or_replay("my_stage_post", my_other_function)
+    """
+
+    def __init__(self):
+        self._graphs: dict[str, Any] = {}
+        self._results: dict[str, Any] = {}
+
+    def capture_or_replay(
+        self,
+        stage: str,
+        fn: Callable[..., Any],
+        args: tuple = (),
+        kwargs: dict[str, Any] | None = None,
+    ) -> Any:
+        """Capture *fn* into a CUDA graph on the first call, then replay.
+
+        Args:
+            stage: Unique name identifying this captured scope.
+            fn: The callable to capture. Must contain only CUDA-graph-safe
+                operations (pure warp kernels, no Python-level branching on
+                GPU data).
+            args: Positional arguments forwarded to *fn*. Defaults to ``()``.
+            kwargs: Keyword arguments forwarded to *fn*. Defaults to ``None``.
+
+        Returns:
+            The cached return value from the first (capture) invocation.
+        """
+        if kwargs is None:
+            kwargs = {}
+        graph = self._graphs.get(stage)
+        if graph is None:
+            with wp.ScopedCapture() as capture:
+                result = fn(*args, **kwargs)
+            self._graphs[stage] = capture.graph
+            self._results[stage] = result
+        wp.capture_launch(self._graphs[stage])
+        return self._results[stage]
+
+    def invalidate(self, stage: str | None = None) -> None:
+        """Drop cached graph(s). If *stage* is ``None``, drop all."""
+        if stage is None:
+            self._graphs.clear()
+            self._results.clear()
+        else:
+            self._graphs.pop(stage, None)
+            self._results.pop(stage, None)
diff --git a/source/isaaclab_experimental/pyproject.toml b/source/isaaclab_experimental/pyproject.toml
new file mode 100644
index 000000000000..d90ac3536f16
--- /dev/null
+++ b/source/isaaclab_experimental/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel", "toml"]
+build-backend = "setuptools.build_meta"
diff --git a/source/isaaclab_experimental/setup.py b/source/isaaclab_experimental/setup.py
new file mode 100644
index 000000000000..d44ab67d789e
--- /dev/null
+++ b/source/isaaclab_experimental/setup.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Installation script for the 'isaaclab_experimental' python package."""
+
+import os
+
+import toml
+from setuptools import find_packages, setup
+
+# Obtain the extension data from the extension.toml file
+EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__))
+# Read the extension.toml file
+EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml"))
+
+# Minimum dependencies required prior to installation
+INSTALL_REQUIRES = [
+    "toml",
+]
+
+# Installation operation
+setup(
+    name="isaaclab_experimental",
+    author="Isaac Lab Project Developers",
+    maintainer="Isaac Lab Project Developers",
+    url=EXTENSION_TOML_DATA["package"]["repository"],
+    version=EXTENSION_TOML_DATA["package"]["version"],
+    description=EXTENSION_TOML_DATA["package"]["description"],
+    keywords=EXTENSION_TOML_DATA["package"]["keywords"],
+    license="BSD-3-Clause",
+    include_package_data=True,
+    python_requires=">=3.10",
+    install_requires=INSTALL_REQUIRES,
+    packages=find_packages(),
+    classifiers=[
+        "Natural Language :: English",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Isaac Sim :: 5.0.0",
+    ],
+    zip_safe=False,
+)
diff --git a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
index e7e52ca43a96..7e753c0f7a2a 100644
--- a/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
+++ b/source/isaaclab_rl/isaaclab_rl/rsl_rl/vecenv_wrapper.py
@@ -47,7 +47,16 @@ def __init__(self, env: ManagerBasedRLEnv | DirectRLEnv, clip_actions: float | N
         # NOTE: import here (not at module level) to avoid loading heavy env classes before Isaac Sim is initialized.
         from isaaclab.envs import DirectRLEnv, ManagerBasedEnv, ManagerBasedRLEnv
 
-        if not isinstance(env.unwrapped, (ManagerBasedRLEnv, ManagerBasedEnv, DirectRLEnv)):
+        try:
+            from isaaclab_experimental.envs import DirectRLEnvWarp
+        except ImportError:
+            DirectRLEnvWarp = None
+
+        allowed_types = (ManagerBasedRLEnv, ManagerBasedEnv, DirectRLEnv)
+        if DirectRLEnvWarp is not None:
+            allowed_types += (DirectRLEnvWarp,)
+
+        if not isinstance(env.unwrapped, allowed_types):
             raise ValueError(
                 "The environment must be inherited from ManagerBasedRLEnv or DirectRLEnv. Environment type:"
                 f" {type(env)}"
diff --git a/source/isaaclab_tasks_experimental/config/extension.toml b/source/isaaclab_tasks_experimental/config/extension.toml
new file mode 100644
index 000000000000..75de8d6da0ef
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/config/extension.toml
@@ -0,0 +1,22 @@
+[package]
+
+# Note: Semantic Versioning is used: https://semver.org/
+version = "0.0.1"
+
+# Description
+title = "Experimental environments for IsaacLab"
+description="Extension containing suite of experimental environments for robot learning."
+readme  = "docs/README.md"
+repository = "https://github.com/isaac-sim/IsaacLab"
+category = "robotics"
+keywords = ["robotics", "rl", "il", "learning"]
+
+[dependencies]
+"isaaclab" = {}
+"isaaclab_assets" = {}
+
+[core]
+reloadable = false
+
+[[python.module]]
+name = "isaaclab_tasks.experimental"
diff --git a/source/isaaclab_tasks_experimental/docs/README.md b/source/isaaclab_tasks_experimental/docs/README.md
new file mode 100644
index 000000000000..d9e681518d64
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/docs/README.md
@@ -0,0 +1,3 @@
+# Isaac Lab: Experimental Environment Suite
+
+Experimental environments for robot learning built on top of Isaac Lab.
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py
new file mode 100644
index 000000000000..918c41d73d7b
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/__init__.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Package containing task implementations for various robotic environments."""
+
+import os
+import toml
+
+# Conveniences to other module directories via relative paths
+ISAACLAB_TASKS_EXPERIMENTAL_EXT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
+"""Path to the extension source directory."""
+
+ISAACLAB_TASKS_EXPERIMENTAL_METADATA = toml.load(
+    os.path.join(ISAACLAB_TASKS_EXPERIMENTAL_EXT_DIR, "config", "extension.toml")
+)
+"""Extension metadata dictionary parsed from the extension.toml file."""
+
+# Configure the module-level variables
+__version__ = ISAACLAB_TASKS_EXPERIMENTAL_METADATA["package"]["version"]
+
+##
+# Register Gym environments.
+##
+
+from isaaclab_tasks.utils import import_packages
+
+# The blacklist is used to prevent importing configs from sub-packages
+_BLACKLIST_PKGS = ["utils", ".mdp"]
+# Import all configs in this package
+import_packages(__name__, _BLACKLIST_PKGS)
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py
new file mode 100644
index 000000000000..3e2b7945ebde
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Direct workflow environments.
+"""
+
+import gymnasium as gym
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py
new file mode 100644
index 000000000000..c954081b9c54
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/__init__.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Allegro Inhand Manipulation environment.
+"""
+
+import gymnasium as gym
+
+##
+# Register Gym environments.
+##
+
+inhand_task_entry = "isaaclab_tasks_experimental.direct.inhand_manipulation"
+stable_agents = "isaaclab_tasks.direct.allegro_hand.agents"
+
+gym.register(
+    id="Isaac-Repose-Cube-Allegro-Direct-Warp-v0",
+    entry_point=f"{inhand_task_entry}.inhand_manipulation_warp_env:InHandManipulationWarpEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.allegro_hand_warp_env_cfg:AllegroHandWarpEnvCfg",
+        "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:AllegroHandPPORunnerCfg",
+        "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml",
+    },
+)
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py
new file mode 100644
index 000000000000..2d607d475329
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/allegro_hand/allegro_hand_warp_env_cfg.py
@@ -0,0 +1,136 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg  # , RigidObjectCfg
+from isaaclab.envs import DirectRLEnvCfg
+from isaaclab.markers import VisualizationMarkersCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import SimulationCfg
+from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg
+from isaaclab.utils import configclass
+from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
+
+from isaaclab_assets.robots.allegro import ALLEGRO_HAND_CFG
+
+
+@configclass
+class AllegroHandWarpEnvCfg(DirectRLEnvCfg):
+    # env
+    decimation = 4
+    episode_length_s = 10.0
+    action_space = 16
+    observation_space = 124  # (full)
+    state_space = 0
+    asymmetric_obs = False
+    obs_type = "full"
+
+    solver_cfg = MJWarpSolverCfg(
+        solver="newton",
+        integrator="implicitfast",
+        njmax=80,
+        nconmax=70,
+        impratio=10.0,
+        cone="elliptic",
+        update_data_interval=2,
+        iterations=100,
+        ls_iterations=15,
+        ls_parallel=False,
+        # save_to_mjcf="AllegroHand.xml",
+    )
+
+    newton_cfg = NewtonCfg(
+        solver_cfg=solver_cfg,
+        num_substeps=2,
+        debug_mode=False,
+    )
+    # simulation
+    sim: SimulationCfg = SimulationCfg(
+        dt=1 / 120,
+        render_interval=decimation,
+        physics_material=RigidBodyMaterialCfg(
+            static_friction=1.0,
+            dynamic_friction=1.0,
+        ),
+        physics=newton_cfg,
+    )
+    # robot
+    robot_cfg: ArticulationCfg = ALLEGRO_HAND_CFG.replace(prim_path="/World/envs/env_.*/Robot")
+
+    actuated_joint_names = [
+        "index_joint_0",
+        "middle_joint_0",
+        "ring_joint_0",
+        "thumb_joint_0",
+        "index_joint_1",
+        "index_joint_2",
+        "index_joint_3",
+        "middle_joint_1",
+        "middle_joint_2",
+        "middle_joint_3",
+        "ring_joint_1",
+        "ring_joint_2",
+        "ring_joint_3",
+        "thumb_joint_1",
+        "thumb_joint_2",
+        "thumb_joint_3",
+    ]
+    fingertip_body_names = [
+        "index_link_3",
+        "middle_link_3",
+        "ring_link_3",
+        "thumb_link_3",
+    ]
+
+    # in-hand object
+    object_cfg: ArticulationCfg = ArticulationCfg(
+        prim_path="/World/envs/env_.*/object",
+        spawn=sim_utils.UsdFileCfg(
+            usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd",
+            mass_props=sim_utils.MassPropertiesCfg(density=400.0),
+            scale=(1.2, 1.2, 1.2),
+        ),
+        # FIXME: it does seem to be a bug for ArticulationCfg for handling empty joint list
+        init_state=ArticulationCfg.InitialStateCfg(
+            pos=(0.0, -0.17, 0.565), rot=(0.0, 0.0, 0.0, 1.0), joint_pos={}, joint_vel={}
+        ),
+        actuators={},
+        articulation_root_prim_path="",
+    )
+    # goal object
+    goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg(
+        prim_path="/Visuals/goal_marker",
+        markers={
+            "goal": sim_utils.UsdFileCfg(
+                usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Blocks/DexCube/dex_cube_instanceable.usd",
+                scale=(1.2, 1.2, 1.2),
+            )
+        },
+    )
+    # scene
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(
+        num_envs=8192, env_spacing=0.75, replicate_physics=True, clone_in_fabric=True
+    )
+    # reset
+    reset_position_noise = 0.01  # range of position at reset
+    reset_dof_pos_noise = 0.2  # range of dof pos at reset
+    reset_dof_vel_noise = 0.0  # range of dof vel at reset
+    # reward scales
+    dist_reward_scale = -10.0
+    rot_reward_scale = 1.0
+    rot_eps = 0.1
+    action_penalty_scale = -0.0002
+    reach_goal_bonus = 250
+    fall_penalty = 0
+    fall_dist = 0.24
+    vel_obs_scale = 0.2
+    success_tolerance = 0.2
+    max_consecutive_success = 0
+    av_factor = 0.1
+    act_moving_average = 1.0
+    force_torque_obs_scale = 10.0
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/__init__.py
new file mode 100644
index 000000000000..d5f4b459a8a6
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/__init__.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Ant locomotion environment.
+"""
+
+import gymnasium as gym
+
+##
+# Register Gym environments.
+##
+
+stable_agents = "isaaclab_tasks.direct.ant.agents"
+
+gym.register(
+    id="Isaac-Ant-Direct-Warp-v0",
+    entry_point=f"{__name__}.ant_env_warp:AntWarpEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.ant_env_warp:AntWarpEnvCfg",
+        "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:AntPPORunnerCfg",
+        "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml",
+    },
+)
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/ant_env_warp.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/ant_env_warp.py
new file mode 100644
index 000000000000..a2c8f91fa3db
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/ant/ant_env_warp.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg
+from isaaclab.envs import DirectRLEnvCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import SimulationCfg
+from isaaclab.terrains import TerrainImporterCfg
+from isaaclab.utils import configclass
+
+from isaaclab_assets import ANT_CFG
+
+from isaaclab_tasks_experimental.direct.locomotion.locomotion_env_warp import LocomotionWarpEnv
+
+
+@configclass
+class AntWarpEnvCfg(DirectRLEnvCfg):
+    # env
+    episode_length_s = 15.0
+    decimation = 2
+    action_scale = 0.5
+    action_space = 8
+    observation_space = 36
+    state_space = 0
+
+    solver_cfg = MJWarpSolverCfg(
+        njmax=45,
+        nconmax=25,
+        cone="pyramidal",
+        integrator="implicitfast",
+        impratio=1,
+    )
+    newton_cfg = NewtonCfg(
+        solver_cfg=solver_cfg,
+        num_substeps=1,
+        debug_mode=False,
+        use_cuda_graph=True,
+    )
+
+    # simulation
+    sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation, physics=newton_cfg)
+    terrain = TerrainImporterCfg(
+        prim_path="/World/ground",
+        terrain_type="plane",
+        collision_group=-1,
+        physics_material=sim_utils.RigidBodyMaterialCfg(
+            friction_combine_mode="average",
+            restitution_combine_mode="average",
+            static_friction=1.0,
+            dynamic_friction=1.0,
+            restitution=0.0,
+        ),
+        debug_vis=False,
+    )
+
+    # scene
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(
+        num_envs=4096, env_spacing=4.0, replicate_physics=True, clone_in_fabric=True
+    )
+
+    # robot
+    robot: ArticulationCfg = ANT_CFG.replace(prim_path="/World/envs/env_.*/Robot")
+    joint_gears: list = [15, 15, 15, 15, 15, 15, 15, 15]
+
+    heading_weight: float = 0.5
+    up_weight: float = 0.1
+
+    energy_cost_scale: float = 0.05
+    actions_cost_scale: float = 0.005
+    alive_reward_scale: float = 0.5
+    dof_vel_scale: float = 0.2
+
+    death_cost: float = -2.0
+    termination_height: float = 0.31
+
+    angular_velocity_scale: float = 1.0
+    contact_force_scale: float = 0.1
+
+
+class AntWarpEnv(LocomotionWarpEnv):
+    cfg: AntWarpEnvCfg
+
+    def __init__(self, cfg: AntWarpEnvCfg, render_mode: str | None = None, **kwargs):
+        super().__init__(cfg, render_mode, **kwargs)
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/__init__.py
new file mode 100644
index 000000000000..19b9f26a287c
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/__init__.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Cartpole balancing environment.
+"""
+
+import gymnasium as gym
+
+##
+# Register Gym environments.
+##
+
+stable_agents = "isaaclab_tasks.direct.cartpole.agents"
+
+gym.register(
+    id="Isaac-Cartpole-Direct-Warp-v0",
+    entry_point=f"{__name__}.cartpole_warp_env:CartpoleWarpEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.cartpole_warp_env:CartpoleWarpEnvCfg",
+        "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:CartpolePPORunnerCfg",
+        "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml",
+        "sb3_cfg_entry_point": f"{stable_agents}:sb3_ppo_cfg.yaml",
+    },
+)
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/cartpole_warp_env.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/cartpole_warp_env.py
new file mode 100644
index 000000000000..a9670c6b8dd4
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/cartpole/cartpole_warp_env.py
@@ -0,0 +1,357 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import math
+
+import warp as wp
+from isaaclab_experimental.envs import DirectRLEnvWarp
+from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import Articulation, ArticulationCfg
+from isaaclab.envs import DirectRLEnvCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import SimulationCfg
+from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
+from isaaclab.utils import configclass
+
+from isaaclab_assets.robots.cartpole import CARTPOLE_CFG
+
+
+@configclass
+class CartpoleWarpEnvCfg(DirectRLEnvCfg):
+    # env
+    decimation = 2
+    episode_length_s = 5.0
+    action_scale = 100.0  # [N]
+    action_space = 1
+    observation_space = 4
+    state_space = 0
+
+    solver_cfg = MJWarpSolverCfg(
+        njmax=5,
+        nconmax=3,
+        cone="pyramidal",
+        integrator="implicitfast",
+        impratio=1,
+    )
+
+    newton_cfg = NewtonCfg(
+        solver_cfg=solver_cfg,
+        num_substeps=1,
+        debug_mode=False,
+        use_cuda_graph=True,
+    )
+
+    # simulation
+    sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation, physics=newton_cfg)
+
+    # robot
+    robot_cfg: ArticulationCfg = CARTPOLE_CFG.replace(prim_path="/World/envs/env_.*/Robot")
+    cart_dof_name = "slider_to_cart"
+    pole_dof_name = "cart_to_pole"
+
+    # scene
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(
+        num_envs=4096, env_spacing=4.0, replicate_physics=True, clone_in_fabric=True
+    )
+
+    # reset
+    max_cart_pos = 3.0  # the cart is reset if it exceeds that position [m]
+    initial_pole_angle_range = [-0.25, 0.25]  # the range in which the pole angle is sampled from on reset [x pi rad]
+
+    # reward scales
+    rew_scale_alive = 1.0
+    rew_scale_terminated = -2.0
+    rew_scale_pole_pos = -1.0
+    rew_scale_cart_vel = -0.01
+    rew_scale_pole_vel = -0.005
+
+
+@wp.kernel
+def get_observations(
+    joint_pos: wp.array2d(dtype=wp.float32),
+    joint_vel: wp.array2d(dtype=wp.float32),
+    cart_dof_idx: wp.int32,
+    pole_dof_idx: wp.int32,
+    observations: wp.array(dtype=wp.vec4f),
+):
+    env_index = wp.tid()
+    observations[env_index][0] = joint_pos[env_index, pole_dof_idx]
+    observations[env_index][1] = joint_vel[env_index, pole_dof_idx]
+    observations[env_index][2] = joint_pos[env_index, cart_dof_idx]
+    observations[env_index][3] = joint_vel[env_index, cart_dof_idx]
+
+
+@wp.kernel
+def update_actions(
+    input_actions: wp.array2d(dtype=wp.float32),
+    actions: wp.array2d(dtype=wp.float32),
+    action_scale: wp.float32,
+    cart_dof_idx: wp.int32,
+):
+    env_index = wp.tid()
+    actions[env_index, cart_dof_idx] = action_scale * input_actions[env_index, 0]
+
+
+@wp.kernel
+def get_dones(
+    joint_pos: wp.array2d(dtype=wp.float32),
+    episode_length_buf: wp.array(dtype=wp.int32),
+    cart_dof_idx: wp.int32,
+    pole_dof_idx: wp.int32,
+    max_episode_length: wp.int32,
+    max_cart_pos: wp.float32,
+    out_of_bounds: wp.array(dtype=wp.bool),
+    time_out: wp.array(dtype=wp.bool),
+    reset: wp.array(dtype=wp.bool),
+):
+    env_index = wp.tid()
+    out_of_bounds[env_index] = (wp.abs(joint_pos[env_index, cart_dof_idx]) > max_cart_pos) or (
+        wp.abs(joint_pos[env_index, pole_dof_idx]) > math.pi / 2.0
+    )
+    time_out[env_index] = episode_length_buf[env_index] >= (max_episode_length - 1)
+    reset[env_index] = out_of_bounds[env_index] or time_out[env_index]
+
+
+@wp.func
+def compute_rew_alive(rew_scale_alive: wp.float32, reset_terminated: bool) -> wp.float32:
+    if reset_terminated:
+        return wp.float32(0.0)
+    return rew_scale_alive
+
+
+@wp.func
+def compute_rew_termination(rew_scale_terminated: wp.float32, reset_terminated: bool) -> wp.float32:
+    if reset_terminated:
+        return rew_scale_terminated
+    return wp.float32(0.0)
+
+
+@wp.func
+def compute_rew_pole_pos(
+    rew_scale_pole_pos: wp.float32,
+    pole_pos: wp.float32,
+) -> wp.float32:
+    return rew_scale_pole_pos * pole_pos * pole_pos
+
+
+@wp.func
+def compute_rew_cart_vel(
+    rew_scale_cart_vel: wp.float32,
+    cart_vel: wp.float32,
+) -> wp.float32:
+    return rew_scale_cart_vel * wp.abs(cart_vel)
+
+
+@wp.func
+def compute_rew_pole_vel(
+    rew_scale_pole_vel: wp.float32,
+    pole_vel: wp.float32,
+) -> wp.float32:
+    return rew_scale_pole_vel * wp.abs(pole_vel)
+
+
+@wp.kernel
+def compute_rewards(
+    rew_scale_alive: wp.float32,
+    rew_scale_terminated: wp.float32,
+    rew_scale_pole_pos: wp.float32,
+    rew_scale_cart_vel: wp.float32,
+    rew_scale_pole_vel: wp.float32,
+    joint_pos: wp.array2d(dtype=wp.float32),
+    joint_vel: wp.array2d(dtype=wp.float32),
+    cart_dof_idx: wp.int32,
+    pole_dof_idx: wp.int32,
+    reset_terminated: wp.array(dtype=wp.bool),
+    reward: wp.array(dtype=wp.float32),
+):
+    env_index = wp.tid()
+    reward[env_index] = (
+        compute_rew_alive(rew_scale_alive, reset_terminated[env_index])
+        + compute_rew_termination(rew_scale_terminated, reset_terminated[env_index])
+        + compute_rew_pole_pos(rew_scale_pole_pos, joint_pos[env_index, pole_dof_idx])
+        + compute_rew_cart_vel(rew_scale_cart_vel, joint_vel[env_index, cart_dof_idx])
+        + compute_rew_pole_vel(rew_scale_pole_vel, joint_vel[env_index, pole_dof_idx])
+    )
+
+
+@wp.kernel
+def reset(
+    default_joint_pos: wp.array2d(dtype=wp.float32),
+    default_joint_vel: wp.array2d(dtype=wp.float32),
+    joint_pos: wp.array2d(dtype=wp.float32),
+    joint_vel: wp.array2d(dtype=wp.float32),
+    cart_dof_idx: wp.int32,
+    pole_dof_idx: wp.int32,
+    initial_pose_angle_range: wp.vec2f,
+    env_mask: wp.array(dtype=wp.bool),
+    state: wp.array(dtype=wp.uint32),
+):
+    env_index = wp.tid()
+    if env_mask[env_index]:
+        joint_pos[env_index, cart_dof_idx] = default_joint_pos[env_index, cart_dof_idx]
+        joint_pos[env_index, pole_dof_idx] = default_joint_pos[env_index, pole_dof_idx] + wp.randf(
+            state[env_index], initial_pose_angle_range[0] * wp.pi, initial_pose_angle_range[1] * wp.pi
+        )
+        joint_vel[env_index, cart_dof_idx] = default_joint_vel[env_index, cart_dof_idx]
+        joint_vel[env_index, pole_dof_idx] = default_joint_vel[env_index, pole_dof_idx]
+        state[env_index] += wp.uint32(1)
+
+
+@wp.kernel
+def initialize_state(
+    state: wp.array(dtype=wp.uint32),
+    seed: wp.int32,
+):
+    env_index = wp.tid()
+    state[env_index] = wp.rand_init(seed, env_index)
+
+
+class CartpoleWarpEnv(DirectRLEnvWarp):
+    cfg: CartpoleWarpEnvCfg
+
+    def __init__(self, cfg: CartpoleWarpEnvCfg, render_mode: str | None = None, **kwargs) -> None:
+        super().__init__(cfg, render_mode, **kwargs)
+
+        # Get the indices (develop API: find_joints returns (indices, names))
+        self._cart_dof_idx, _ = self.cartpole.find_joints(self.cfg.cart_dof_name)
+        self._pole_dof_idx, _ = self.cartpole.find_joints(self.cfg.pole_dof_name)
+
+        self.action_scale = self.cfg.action_scale
+
+        # Simulation bindings
+        # Note: these are direct memory views into the Newton simulation data, they should not be modified directly
+        self.joint_pos = self.cartpole.data.joint_pos
+        self.joint_vel = self.cartpole.data.joint_vel
+
+        # Buffers
+        self.observations = wp.zeros((self.num_envs), dtype=wp.vec4f, device=self.device)
+        self.actions = wp.zeros((self.num_envs, self.cartpole.num_joints), dtype=wp.float32, device=self.device)
+        self.rewards = wp.zeros((self.num_envs), dtype=wp.float32, device=self.device)
+        self.states = wp.zeros((self.num_envs), dtype=wp.uint32, device=self.device)
+
+        if self.cfg.seed is None:
+            self.cfg.seed = -1
+
+        wp.launch(
+            initialize_state,
+            dim=self.num_envs,
+            inputs=[
+                self.states,
+                self.cfg.seed,
+            ],
+        )
+
+        # Bind torch buffers to warp buffers
+        self.torch_obs_buf = wp.to_torch(self.observations)
+        self.torch_reward_buf = wp.to_torch(self.rewards)
+        self.torch_reset_terminated = wp.to_torch(self.reset_terminated)
+        self.torch_reset_time_outs = wp.to_torch(self.reset_time_outs)
+        self.torch_episode_length_buf = self.episode_length_buf  # already a torch tensor via wp.to_torch
+
+    def _setup_scene(self) -> None:
+        self.cartpole = Articulation(self.cfg.robot_cfg)
+        # add ground plane
+        spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
+        # clone and replicate
+        self.scene.clone_environments(copy_from_source=False)
+        # we need to explicitly filter collisions for CPU simulation
+        if self.device == "cpu":
+            self.scene.filter_collisions(global_prim_paths=[])
+        # add articulation to scene
+        self.scene.articulations["cartpole"] = self.cartpole
+        # add lights
+        light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
+        light_cfg.func("/World/Light", light_cfg)
+
+    def _pre_physics_step(self, actions: wp.array) -> None:
+        wp.launch(
+            update_actions,
+            dim=self.num_envs,
+            inputs=[
+                actions,
+                self.actions,
+                self.action_scale,
+                self._cart_dof_idx[0],
+            ],
+        )
+
+    def _apply_action(self) -> None:
+        self.cartpole.set_joint_effort_target_mask(target=self.actions)
+
+    def _get_observations(self) -> dict:
+        wp.launch(
+            get_observations,
+            dim=self.num_envs,
+            inputs=[
+                self.joint_pos,
+                self.joint_vel,
+                self._cart_dof_idx[0],
+                self._pole_dof_idx[0],
+                self.observations,
+            ],
+        )
+        return {"policy": self.torch_obs_buf}
+
+    def _get_rewards(self) -> None:
+        wp.launch(
+            compute_rewards,
+            dim=self.num_envs,
+            inputs=[
+                self.cfg.rew_scale_alive,
+                self.cfg.rew_scale_terminated,
+                self.cfg.rew_scale_pole_pos,
+                self.cfg.rew_scale_cart_vel,
+                self.cfg.rew_scale_pole_vel,
+                self.joint_pos,
+                self.joint_vel,
+                self._cart_dof_idx[0],
+                self._pole_dof_idx[0],
+                self.reset_terminated,
+                self.rewards,
+            ],
+        )
+
+    def _get_dones(self) -> None:
+        wp.launch(
+            get_dones,
+            dim=self.num_envs,
+            inputs=[
+                self.joint_pos,
+                self._episode_length_buf_wp,
+                self._cart_dof_idx[0],
+                self._pole_dof_idx[0],
+                self.max_episode_length,
+                self.cfg.max_cart_pos,
+                self.reset_terminated,
+                self.reset_time_outs,
+                self.reset_buf,
+            ],
+        )
+
+    def _reset_idx(self, mask: wp.array | None = None) -> None:
+        if mask is None:
+            mask = self._ALL_ENV_MASK
+
+        super()._reset_idx(mask)
+
+        wp.launch(
+            reset,
+            dim=self.num_envs,
+            inputs=[
+                self.cartpole.data.default_joint_pos,
+                self.cartpole.data.default_joint_vel,
+                self.joint_pos,
+                self.joint_vel,
+                self._cart_dof_idx[0],
+                self._pole_dof_idx[0],
+                self.cfg.initial_pole_angle_range,
+                mask,
+                self.states,
+            ],
+        )
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/__init__.py
new file mode 100644
index 000000000000..8dcb10a57bfd
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/__init__.py
@@ -0,0 +1,28 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Humanoid locomotion environment.
+"""
+
+import gymnasium as gym
+
+##
+# Register Gym environments.
+##
+
+stable_agents = "isaaclab_tasks.direct.humanoid.agents"
+
+gym.register(
+    id="Isaac-Humanoid-Direct-Warp-v0",
+    entry_point=f"{__name__}.humanoid_warp_env:HumanoidWarpEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.humanoid_warp_env:HumanoidWarpEnvCfg",
+        "rl_games_cfg_entry_point": f"{stable_agents}:rl_games_ppo_cfg.yaml",
+        "rsl_rl_cfg_entry_point": f"{stable_agents}.rsl_rl_ppo_cfg:HumanoidPPORunnerCfg",
+        "skrl_cfg_entry_point": f"{stable_agents}:skrl_ppo_cfg.yaml",
+    },
+)
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/humanoid_warp_env.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/humanoid_warp_env.py
new file mode 100644
index 000000000000..1dc4d23f7814
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/humanoid/humanoid_warp_env.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+from isaaclab_newton.physics import MJWarpSolverCfg, NewtonCfg
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg
+from isaaclab.envs import DirectRLEnvCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import SimulationCfg
+from isaaclab.terrains import TerrainImporterCfg
+from isaaclab.utils import configclass
+
+from isaaclab_assets import HUMANOID_CFG
+
+from isaaclab_tasks_experimental.direct.locomotion.locomotion_env_warp import LocomotionWarpEnv
+
+
+@configclass
+class HumanoidWarpEnvCfg(DirectRLEnvCfg):
+    # env
+    episode_length_s = 15.0
+    decimation = 2
+    action_scale = 1.0
+    action_space = 21
+    observation_space = 75
+    state_space = 0
+
+    solver_cfg = MJWarpSolverCfg(
+        njmax=80,
+        nconmax=25,
+        cone="pyramidal",
+        integrator="implicitfast",
+        update_data_interval=2,
+        impratio=1,
+    )
+    newton_cfg = NewtonCfg(
+        solver_cfg=solver_cfg,
+        num_substeps=2,
+        debug_mode=False,
+        use_cuda_graph=True,
+    )
+
+    # simulation
+    sim: SimulationCfg = SimulationCfg(dt=1 / 120, render_interval=decimation, physics=newton_cfg)
+    terrain = TerrainImporterCfg(
+        prim_path="/World/ground",
+        terrain_type="plane",
+        collision_group=-1,
+        physics_material=sim_utils.RigidBodyMaterialCfg(
+            friction_combine_mode="average",
+            restitution_combine_mode="average",
+            static_friction=1.0,
+            dynamic_friction=1.0,
+            restitution=0.0,
+        ),
+        debug_vis=False,
+    )
+
+    # scene
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(
+        num_envs=4096, env_spacing=4.0, replicate_physics=True, clone_in_fabric=True
+    )
+
+    # robot
+    robot: ArticulationCfg = HUMANOID_CFG.replace(prim_path="/World/envs/env_.*/Robot")
+    joint_gears: list = [
+        67.5000,  # left_upper_arm
+        67.5000,  # left_upper_arm
+        45.0000,  # left_lower_arm
+        67.5000,  # lower_waist
+        67.5000,  # lower_waist
+        67.5000,  # pelvis
+        45.0000,  # left_thigh: x
+        135.0000,  # left_thigh: y
+        45.0000,  # left_thigh: z
+        90.0000,  # left_knee
+        22.5,  # left_foot
+        22.5,  # left_foot
+        45.0000,  # right_thigh: x
+        135.0000,  # right_thigh: y
+        45.0000,  # right_thigh: z
+        90.0000,  # right_knee
+        22.5,  # right_foot
+        22.5,  # right_foot
+        67.5000,  # right_upper_arm
+        67.5000,  # right_upper_arm
+        45.0000,  # right_lower_arm
+    ]
+
+    heading_weight: float = 0.5
+    up_weight: float = 0.1
+
+    energy_cost_scale: float = 0.05
+    actions_cost_scale: float = 0.01
+    alive_reward_scale: float = 2.0
+    dof_vel_scale: float = 0.1
+
+    death_cost: float = -1.0
+    termination_height: float = 0.8
+
+    angular_velocity_scale: float = 0.25
+    contact_force_scale: float = 0.01
+
+
+class HumanoidWarpEnv(LocomotionWarpEnv):
+    cfg: HumanoidWarpEnvCfg
+
+    def __init__(self, cfg: HumanoidWarpEnvCfg, render_mode: str | None = None, **kwargs):
+        super().__init__(cfg, render_mode, **kwargs)
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py
new file mode 100644
index 000000000000..460a30569089
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py
new file mode 100644
index 000000000000..8b142cf8e2ee
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/inhand_manipulation/inhand_manipulation_warp_env.py
@@ -0,0 +1,980 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import TYPE_CHECKING
+
+import torch
+import warp as wp
+from isaaclab_experimental.envs import DirectRLEnvWarp
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import Articulation  # , RigidObject
+from isaaclab.markers import VisualizationMarkers
+from isaaclab.sim.spawners.from_files import GroundPlaneCfg, spawn_ground_plane
+
+if TYPE_CHECKING:
+    from isaaclab_tasks_experimental.direct.allegro_hand.allegro_hand_warp_env_cfg import AllegroHandWarpEnvCfg
+
+
+@wp.kernel
+def initialize_rng_state(
+    # input
+    seed: wp.int32,
+    # output
+    state: wp.array(dtype=wp.uint32),
+):
+    env_id = wp.tid()
+    state[env_id] = wp.rand_init(seed, wp.int32(env_id))
+
+
+@wp.kernel
+def apply_actions_to_targets(
+    # input
+    actions: wp.array2d(dtype=wp.float32),
+    lower_limits: wp.array2d(dtype=wp.float32),
+    upper_limits: wp.array2d(dtype=wp.float32),
+    actuated_dof_indices: wp.array(dtype=wp.int32),
+    act_moving_average: wp.float32,
+    # input/output
+    prev_targets: wp.array2d(dtype=wp.float32),
+    # output
+    cur_targets: wp.array2d(dtype=wp.float32),
+):
+    env_id, i = wp.tid()
+    dof_id = actuated_dof_indices[i]
+
+    # clamp and scale action to target range
+    a = wp.clamp(actions[env_id, i], wp.float32(-1.0), wp.float32(1.0))
+    lower = lower_limits[env_id, dof_id]
+    upper = upper_limits[env_id, dof_id]
+    t = scale(a, lower, upper)
+
+    # smoothing and boundary clamping
+    t = act_moving_average * t + (wp.float32(1.0) - act_moving_average) * prev_targets[env_id, dof_id]
+    t = wp.clamp(t, lower, upper)
+
+    # update targets
+    cur_targets[env_id, dof_id] = t
+    prev_targets[env_id, dof_id] = t
+
+
+@wp.kernel
+def reset_target_pose(
+    # input
+    env_mask: wp.array(dtype=wp.bool),
+    x_unit_vec: wp.vec3f,
+    y_unit_vec: wp.vec3f,
+    env_origins: wp.array(dtype=wp.vec3f),
+    goal_pos: wp.array(dtype=wp.vec3f),
+    # input/output
+    rng_state: wp.array(dtype=wp.uint32),
+    # output
+    goal_rot: wp.array(dtype=wp.quatf),
+    reset_goal_buf: wp.array(dtype=wp.bool),
+    goal_pos_w: wp.array(dtype=wp.vec3f),
+):
+    env_id = wp.tid()
+    if env_mask[env_id]:
+        rand0 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        rand1 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+
+        goal_rot[env_id] = randomize_rotation(rand0, rand1, x_unit_vec, y_unit_vec)
+        reset_goal_buf[env_id] = False
+
+        # Warp-native addition: goal position in world frame.
+        goal_pos_w[env_id] = goal_pos[env_id] + env_origins[env_id]
+
+
+@wp.kernel
+def reset_object(
+    # input
+    default_root_pose: wp.array(dtype=wp.transformf),
+    env_origins: wp.array(dtype=wp.vec3f),
+    reset_position_noise: wp.float32,
+    x_unit_vec: wp.vec3f,
+    y_unit_vec: wp.vec3f,
+    env_mask: wp.array(dtype=wp.bool),
+    # input/output
+    rng_state: wp.array(dtype=wp.uint32),
+    # output
+    root_pose_w: wp.array(dtype=wp.transformf),
+    root_vel_w: wp.array(dtype=wp.spatial_vectorf),
+):
+    env_id = wp.tid()
+    if env_mask[env_id]:
+        nx = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        ny = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        nz = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+
+        pos_noise = reset_position_noise * wp.vec3f(nx, ny, nz)
+        base_pos = wp.transform_get_translation(default_root_pose[env_id])
+        pos_w = base_pos + env_origins[env_id] + pos_noise
+
+        rand0 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        rand1 = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+        rng_state[env_id] += wp.uint32(1)
+        rot_w = randomize_rotation(rand0, rand1, x_unit_vec, y_unit_vec)
+
+        # The following should be equivalent, but consider using write_root_pose_to_sim and write_root_velocity_to_sim
+        root_pose_w[env_id] = wp.transform(pos_w, rot_w)
+        root_vel_w[env_id] = wp.spatial_vectorf(
+            wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0), wp.float32(0.0)
+        )
+
+
+@wp.kernel
+def reset_hand(
+    # input
+    default_joint_pos: wp.array2d(dtype=wp.float32),
+    default_joint_vel: wp.array2d(dtype=wp.float32),
+    lower_limits: wp.array2d(dtype=wp.float32),
+    upper_limits: wp.array2d(dtype=wp.float32),
+    reset_dof_pos_noise: wp.float32,
+    reset_dof_vel_noise: wp.float32,
+    env_mask: wp.array(dtype=wp.bool),
+    num_dofs: wp.int32,
+    # input/output
+    rng_state: wp.array(dtype=wp.uint32),
+    # output
+    joint_pos: wp.array2d(dtype=wp.float32),
+    joint_vel: wp.array2d(dtype=wp.float32),
+    prev_targets: wp.array2d(dtype=wp.float32),
+    cur_targets: wp.array2d(dtype=wp.float32),
+    hand_dof_targets: wp.array2d(dtype=wp.float32),
+):
+    env_id = wp.tid()
+    if env_mask[env_id]:
+        # Each env runs sequentially inside this kernel (avoids RNG races across DOFs).
+        for dof_id in range(num_dofs):
+            dof_pos_noise = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+            rng_state[env_id] += wp.uint32(1)
+
+            delta_max = upper_limits[env_id, dof_id] - default_joint_pos[env_id, dof_id]
+            delta_min = lower_limits[env_id, dof_id] - default_joint_pos[env_id, dof_id]
+            rand_delta = delta_min + (delta_max - delta_min) * 0.5 * dof_pos_noise
+            pos = default_joint_pos[env_id, dof_id] + reset_dof_pos_noise * rand_delta
+
+            dof_vel_noise = wp.randf(rng_state[env_id], wp.float32(-1.0), wp.float32(1.0))
+            rng_state[env_id] += wp.uint32(1)
+            vel = default_joint_vel[env_id, dof_id] + reset_dof_vel_noise * dof_vel_noise
+
+            # The following lines should be equivalent to the following:
+            # self.hand.write_joint_state_to_sim(dof_pos, dof_vel, env_ids=env_ids)
+            joint_pos[env_id, dof_id] = pos
+            joint_vel[env_id, dof_id] = vel
+
+            prev_targets[env_id, dof_id] = pos
+            cur_targets[env_id, dof_id] = pos
+            hand_dof_targets[env_id, dof_id] = pos
+
+
+@wp.kernel
+def reset_successes(
+    # input
+    env_mask: wp.array(dtype=wp.bool),
+    # output
+    successes: wp.array(dtype=wp.float32),
+):
+    env_id = wp.tid()
+    if env_mask[env_id]:
+        successes[env_id] = wp.float32(0.0)
+
+
+@wp.kernel
+def compute_intermediate_values(
+    # input
+    body_pos_w: wp.array2d(dtype=wp.vec3f),
+    body_quat_w: wp.array2d(dtype=wp.quatf),
+    body_vel_w: wp.array2d(dtype=wp.spatial_vectorf),
+    finger_bodies: wp.array(dtype=wp.int32),
+    env_origins: wp.array(dtype=wp.vec3f),
+    object_root_pose_w: wp.array(dtype=wp.transformf),
+    object_root_vel_w: wp.array(dtype=wp.spatial_vectorf),
+    num_fingertips: wp.int32,
+    # output
+    fingertip_pos: wp.array2d(dtype=wp.vec3f),
+    fingertip_rot: wp.array2d(dtype=wp.quatf),
+    fingertip_velocities: wp.array2d(dtype=wp.spatial_vectorf),
+    object_pose: wp.array(dtype=wp.transformf),
+    object_vels: wp.array(dtype=wp.spatial_vectorf),
+):
+    env_id = wp.tid()
+
+    for i in range(num_fingertips):
+        body_id = finger_bodies[i]
+        fingertip_pos[env_id, i] = body_pos_w[env_id, body_id] - env_origins[env_id]
+        fingertip_rot[env_id, i] = body_quat_w[env_id, body_id]
+        fingertip_velocities[env_id, i] = body_vel_w[env_id, body_id]
+
+    # Store object pose in env-local frame (translation only; orientation unchanged).
+    pos_w = wp.transform_get_translation(object_root_pose_w[env_id])
+    pos = pos_w - env_origins[env_id]
+    rot = wp.transform_get_rotation(object_root_pose_w[env_id])
+    object_pose[env_id] = wp.transform(pos, rot)
+    object_vels[env_id] = object_root_vel_w[env_id]
+
+
+@wp.kernel
+def get_dones(
+    # input
+    max_episode_length: wp.int32,
+    object_pose: wp.array(dtype=wp.transformf),
+    in_hand_pos: wp.array(dtype=wp.vec3f),
+    goal_rot: wp.array(dtype=wp.quatf),
+    fall_dist: wp.float32,
+    success_tolerance: wp.float32,
+    max_consecutive_success: wp.int32,
+    successes: wp.array(dtype=wp.float32),
+    # input/output
+    episode_length_buf: wp.array(dtype=wp.int32),
+    # output
+    out_of_reach: wp.array(dtype=wp.bool),
+    time_out: wp.array(dtype=wp.bool),
+    reset: wp.array(dtype=wp.bool),
+):
+    env_id = wp.tid()
+
+    object_pos = wp.transform_get_translation(object_pose[env_id])
+    object_rot = wp.transform_get_rotation(object_pose[env_id])
+
+    goal_dist = wp.length(object_pos - in_hand_pos[env_id])
+    out_of_reach[env_id] = goal_dist >= fall_dist
+
+    max_success_reached = False
+    if max_consecutive_success > 0:
+        # Reset progress (episode length buf) on goal envs if max_consecutive_success > 0
+        rot_dist = rotation_distance(object_rot, goal_rot[env_id])
+        if wp.abs(rot_dist) <= success_tolerance:
+            episode_length_buf[env_id] = 0
+        max_success_reached = successes[env_id] >= wp.float32(max_consecutive_success)
+
+    time_out[env_id] = episode_length_buf[env_id] >= (max_episode_length - 1) or max_success_reached
+    reset[env_id] = out_of_reach[env_id] or time_out[env_id]
+
+
+@wp.kernel
+def compute_reduced_observations(
+    # input
+    fingertip_pos: wp.array2d(dtype=wp.vec3f),
+    object_pose: wp.array(dtype=wp.transformf),
+    goal_rot: wp.array(dtype=wp.quatf),
+    actions: wp.array2d(dtype=wp.float32),
+    num_fingertips: wp.int32,
+    action_dim: wp.int32,
+    # output
+    observations: wp.array2d(dtype=wp.float32),
+):
+    env_id = wp.tid()
+
+    obj_pos = wp.transform_get_translation(object_pose[env_id])
+    obj_rot = wp.transform_get_rotation(object_pose[env_id])
+
+    idx = int(0)
+    for i in range(num_fingertips):
+        observations[env_id, idx + 0] = fingertip_pos[env_id, i][0]
+        observations[env_id, idx + 1] = fingertip_pos[env_id, i][1]
+        observations[env_id, idx + 2] = fingertip_pos[env_id, i][2]
+        idx += 3
+
+    observations[env_id, idx + 0] = obj_pos[0]
+    observations[env_id, idx + 1] = obj_pos[1]
+    observations[env_id, idx + 2] = obj_pos[2]
+    idx += 3
+
+    rel = obj_rot * wp.quat_inverse(goal_rot[env_id])
+    observations[env_id, idx + 0] = rel[0]
+    observations[env_id, idx + 1] = rel[1]
+    observations[env_id, idx + 2] = rel[2]
+    observations[env_id, idx + 3] = rel[3]
+    idx += 4
+
+    for i in range(action_dim):
+        observations[env_id, idx + i] = actions[env_id, i]
+
+
+@wp.kernel
+def compute_full_observations(
+    # input
+    hand_dof_pos: wp.array2d(dtype=wp.float32),
+    hand_dof_vel: wp.array2d(dtype=wp.float32),
+    hand_dof_lower_limits: wp.array2d(dtype=wp.float32),
+    hand_dof_upper_limits: wp.array2d(dtype=wp.float32),
+    vel_obs_scale: wp.float32,
+    object_pose: wp.array(dtype=wp.transformf),
+    object_vels: wp.array(dtype=wp.spatial_vectorf),
+    in_hand_pos: wp.array(dtype=wp.vec3f),
+    goal_rot: wp.array(dtype=wp.quatf),
+    fingertip_pos: wp.array2d(dtype=wp.vec3f),
+    fingertip_rot: wp.array2d(dtype=wp.quatf),
+    fingertip_velocities: wp.array2d(dtype=wp.spatial_vectorf),
+    actions: wp.array2d(dtype=wp.float32),
+    num_hand_dofs: wp.int32,
+    num_fingertips: wp.int32,
+    action_dim: wp.int32,
+    # output
+    observations: wp.array2d(dtype=wp.float32),
+):
+    env_id = wp.tid()
+
+    # hand
+    for i in range(num_hand_dofs):
+        observations[env_id, i] = unscale(
+            hand_dof_pos[env_id, i], hand_dof_lower_limits[env_id, i], hand_dof_upper_limits[env_id, i]
+        )
+
+    offset = num_hand_dofs
+    for i in range(num_hand_dofs):
+        observations[env_id, offset + i] = vel_obs_scale * hand_dof_vel[env_id, i]
+    offset += num_hand_dofs
+
+    # object
+    obj_pos = wp.transform_get_translation(object_pose[env_id])
+    obj_rot = wp.transform_get_rotation(object_pose[env_id])
+
+    observations[env_id, offset + 0] = obj_pos[0]
+    observations[env_id, offset + 1] = obj_pos[1]
+    observations[env_id, offset + 2] = obj_pos[2]
+    offset += 3
+
+    observations[env_id, offset + 0] = obj_rot[0]
+    observations[env_id, offset + 1] = obj_rot[1]
+    observations[env_id, offset + 2] = obj_rot[2]
+    observations[env_id, offset + 3] = obj_rot[3]
+    offset += 4
+
+    # spatial_vectorf layout: [0:3]=angular, [3:6]=linear
+    # torch reference order: linear (unscaled) first, then angular (scaled)
+    observations[env_id, offset + 0] = object_vels[env_id][3]
+    observations[env_id, offset + 1] = object_vels[env_id][4]
+    observations[env_id, offset + 2] = object_vels[env_id][5]
+    offset += 3
+
+    observations[env_id, offset + 0] = vel_obs_scale * object_vels[env_id][0]
+    observations[env_id, offset + 1] = vel_obs_scale * object_vels[env_id][1]
+    observations[env_id, offset + 2] = vel_obs_scale * object_vels[env_id][2]
+    offset += 3
+
+    # goal
+    observations[env_id, offset + 0] = in_hand_pos[env_id][0]
+    observations[env_id, offset + 1] = in_hand_pos[env_id][1]
+    observations[env_id, offset + 2] = in_hand_pos[env_id][2]
+    offset += 3
+
+    observations[env_id, offset + 0] = goal_rot[env_id][0]
+    observations[env_id, offset + 1] = goal_rot[env_id][1]
+    observations[env_id, offset + 2] = goal_rot[env_id][2]
+    observations[env_id, offset + 3] = goal_rot[env_id][3]
+    offset += 4
+
+    rel = obj_rot * wp.quat_inverse(goal_rot[env_id])
+    observations[env_id, offset + 0] = rel[0]
+    observations[env_id, offset + 1] = rel[1]
+    observations[env_id, offset + 2] = rel[2]
+    observations[env_id, offset + 3] = rel[3]
+    offset += 4
+
+    # fingertips
+    for i in range(num_fingertips):
+        observations[env_id, offset + 0] = fingertip_pos[env_id, i][0]
+        observations[env_id, offset + 1] = fingertip_pos[env_id, i][1]
+        observations[env_id, offset + 2] = fingertip_pos[env_id, i][2]
+        offset += 3
+
+    for i in range(num_fingertips):
+        observations[env_id, offset + 0] = fingertip_rot[env_id, i][0]
+        observations[env_id, offset + 1] = fingertip_rot[env_id, i][1]
+        observations[env_id, offset + 2] = fingertip_rot[env_id, i][2]
+        observations[env_id, offset + 3] = fingertip_rot[env_id, i][3]
+        offset += 4
+
+    for i in range(num_fingertips):
+        for j in range(6):
+            observations[env_id, offset + j] = fingertip_velocities[env_id, i][j]
+        offset += 6
+
+    # actions
+    for i in range(action_dim):
+        observations[env_id, offset + i] = actions[env_id, i]
+
+
+@wp.kernel
+def sanitize_and_print_once(
+    # input/output
+    obs: wp.array(dtype=wp.float32),
+    printed_flag: wp.array(dtype=wp.int32),
+):
+    i = wp.tid()
+    v = obs[i]
+
+    if not wp.isfinite(v):
+        # Try to claim the "print token"
+        if wp.atomic_cas(printed_flag, 0, 0, 1) == 0:
+            wp.printf("Non-finite values in observations")
+
+        obs[i] = wp.float32(0.0)
+
+
+@wp.kernel
+def compute_rewards(
+    # input
+    reset_buf: wp.array(dtype=wp.bool),
+    object_pose: wp.array(dtype=wp.transformf),
+    target_pos: wp.array(dtype=wp.vec3f),
+    target_rot: wp.array(dtype=wp.quatf),
+    dist_reward_scale: wp.float32,
+    rot_reward_scale: wp.float32,
+    rot_eps: wp.float32,
+    actions: wp.array2d(dtype=wp.float32),
+    action_penalty_scale: wp.float32,
+    success_tolerance: wp.float32,
+    reach_goal_bonus: wp.float32,
+    fall_dist: wp.float32,
+    fall_penalty: wp.float32,
+    action_dim: wp.int32,
+    # input/output
+    reset_goal_buf: wp.array(dtype=wp.bool),
+    successes: wp.array(dtype=wp.float32),
+    num_resets_out: wp.array(dtype=wp.float32),
+    finished_cons_successes_out: wp.array(dtype=wp.float32),
+    # output
+    reward_out: wp.array(dtype=wp.float32),
+):
+    env_id = wp.tid()
+
+    obj_pos = wp.transform_get_translation(object_pose[env_id])
+    obj_rot = wp.transform_get_rotation(object_pose[env_id])
+
+    goal_dist = wp.length(obj_pos - target_pos[env_id])
+    rot_dist = rotation_distance(obj_rot, target_rot[env_id])
+
+    dist_rew = goal_dist * dist_reward_scale
+    rot_rew = wp.float32(1.0) / (wp.abs(rot_dist) + rot_eps) * rot_reward_scale
+
+    action_penalty = wp.float32(0.0)
+    for i in range(action_dim):
+        action_penalty += actions[env_id, i] * actions[env_id, i]
+
+    # Total reward is: position distance + orientation alignment + action regularization + success bonus + fall penalty
+    reward = dist_rew + rot_rew + action_penalty * action_penalty_scale
+
+    # Find out which envs hit the goal and update successes count
+    reached = wp.abs(rot_dist) <= success_tolerance
+    goal_resets = reached or reset_goal_buf[env_id]
+    reset_goal_buf[env_id] = goal_resets
+    if goal_resets:
+        successes[env_id] = successes[env_id] + wp.float32(1.0)
+
+    # Success bonus: orientation is within `success_tolerance` of goal orientation
+    if goal_resets:
+        reward = reward + reach_goal_bonus
+
+    # Fall penalty: distance to the goal is larger than a threshold
+    if goal_dist >= fall_dist:
+        reward = reward + fall_penalty
+
+    # Consecutive-successes stats (mirrors Torch env):
+    #   resets = torch.where(goal_dist >= fall_dist, ones_like(reset_buf), reset_buf)
+    resets = (goal_dist >= fall_dist) or reset_buf[env_id]
+    if resets:
+        wp.atomic_add(num_resets_out, 0, wp.float32(1.0))
+        wp.atomic_add(finished_cons_successes_out, 0, successes[env_id])
+
+    reward_out[env_id] = reward
+
+
+@wp.kernel
+def update_consecutive_successes_from_stats(
+    # input
+    num_resets: wp.array(dtype=wp.float32),
+    finished_cons_successes: wp.array(dtype=wp.float32),
+    av_factor: wp.float32,
+    # input/output
+    consecutive_successes: wp.array(dtype=wp.float32),
+):
+    """Finalize the Torch env's EMA update for consecutive_successes and clear the accumulators."""
+    # single-thread kernel (dim=1)
+    n = num_resets[0]
+    prev = consecutive_successes[0]
+    if n > wp.float32(0.0):
+        consecutive_successes[0] = av_factor * (finished_cons_successes[0] / n) + (wp.float32(1.0) - av_factor) * prev
+
+
+@wp.func
+def scale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32:
+    return wp.float32(0.5) * (x + wp.float32(1.0)) * (upper - lower) + lower
+
+
+@wp.func
+def unscale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32:
+    return (wp.float32(2.0) * x - upper - lower) / (upper - lower)
+
+
+@wp.func
+def randomize_rotation(rand0: wp.float32, rand1: wp.float32, x_axis: wp.vec3f, y_axis: wp.vec3f) -> wp.quatf:
+    return wp.quat_from_axis_angle(x_axis, rand0 * wp.pi) * wp.quat_from_axis_angle(y_axis, rand1 * wp.pi)
+
+
+@wp.func
+def rotation_distance(object_rot: wp.quatf, target_rot: wp.quatf) -> wp.float32:
+    # Orientation alignment for the cube in hand and goal cube
+    quat_diff = object_rot * wp.quat_inverse(target_rot)
+    # Match Torch env convention: uses indices [1:4] for the vector part (see `rotation_distance` in Torch env).
+    v_norm = wp.sqrt(quat_diff[1] * quat_diff[1] + quat_diff[2] * quat_diff[2] + quat_diff[3] * quat_diff[3])
+    v_norm = wp.min(v_norm, wp.float32(1.0))
+    return wp.float32(2.0) * wp.asin(v_norm)
+
+
+class InHandManipulationWarpEnv(DirectRLEnvWarp):
+    cfg: AllegroHandWarpEnvCfg  # | ShadowHandWarpEnvCfg
+
+    # def __init__(self, cfg: AllegroHandWarpEnvCfg | ShadowHandWarpEnvCfg, render_mode: str | None = None, **kwargs):
+    def __init__(self, cfg: AllegroHandWarpEnvCfg, render_mode: str | None = None, **kwargs):
+        super().__init__(cfg, render_mode, **kwargs)
+
+        # ---------------------------------------------------------------------
+        # Constants
+        # ---------------------------------------------------------------------
+
+        # dof used for joint related init and sample
+        self.num_hand_dofs = self.hand.num_joints
+
+        # list of actuated joints
+        actuated_dof_indices: list[int] = list()
+        for joint_name in cfg.actuated_joint_names:
+            actuated_dof_indices.append(self.hand.joint_names.index(joint_name))
+        actuated_dof_indices.sort()
+        self.num_actuated_dofs = len(actuated_dof_indices)
+
+        # Warp index/mask helpers for kernels and articulation APIs.
+        self.actuated_dof_indices = wp.array(actuated_dof_indices, dtype=wp.int32, device=self.device)
+        actuated_mask = [False] * self.num_hand_dofs
+        for idx in actuated_dof_indices:
+            actuated_mask[idx] = True
+        self.actuated_dof_mask = wp.array(actuated_mask, dtype=wp.bool, device=self.device)
+
+        # finger bodies
+        finger_bodies: list[int] = list()
+        for body_name in self.cfg.fingertip_body_names:
+            finger_bodies.append(self.hand.body_names.index(body_name))
+        finger_bodies.sort()
+        self.num_fingertips = len(finger_bodies)
+        self.finger_bodies = wp.array(finger_bodies, dtype=wp.int32, device=self.device)
+
+        # joint limits
+        self.hand_dof_lower_limits = self.hand.data.joint_pos_limits_lower
+        self.hand_dof_upper_limits = self.hand.data.joint_pos_limits_upper
+
+        # unit vectors
+        self.x_unit_vec = wp.vec3f(1.0, 0.0, 0.0)
+        self.y_unit_vec = wp.vec3f(0.0, 1.0, 0.0)
+        self.z_unit_vec = wp.vec3f(0.0, 0.0, 1.0)
+
+        # Per-env origins (Warp view for kernels; Torch env uses `self.scene.env_origins` directly).
+        self.env_origins = wp.from_torch(self.scene.env_origins, dtype=wp.vec3f)
+
+        # ---------------------------------------------------------------------
+        # Warp buffers
+        # ---------------------------------------------------------------------
+
+        # buffers for position targets
+        self.hand_dof_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device)
+        self.prev_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device)
+        self.cur_targets = wp.zeros((self.num_envs, self.num_hand_dofs), dtype=wp.float32, device=self.device)
+
+        # track goal resets
+        self.reset_goal_buf = wp.zeros(self.num_envs, dtype=wp.bool, device=self.device)
+        # used to compare object position
+        self.in_hand_pos = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device)
+        # default goal positions
+        self.goal_rot = wp.zeros(self.num_envs, dtype=wp.quatf, device=self.device)
+        self.goal_pos = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device)
+        self.goal_pos_w = wp.zeros(self.num_envs, dtype=wp.vec3f, device=self.device)
+
+        # Initialize goal constants from Torch (avoid a one-off kernel launch).
+        default_root_pose = wp.to_torch(self.object.data.default_root_pose).to(self.device)
+        in_hand_pos = default_root_pose[:, 0:3].clone()
+        in_hand_pos[:, 2] -= 0.04
+        self.in_hand_pos.assign(wp.from_torch(in_hand_pos, dtype=wp.vec3f))
+
+        goal_pos = torch.tensor([-0.2, -0.45, 0.68], device=self.device, dtype=torch.float32).repeat((self.num_envs, 1))
+        self.goal_pos.assign(wp.from_torch(goal_pos, dtype=wp.vec3f))
+
+        goal_rot = torch.zeros((self.num_envs, 4), device=self.device, dtype=torch.float32)
+        goal_rot[:, 3] = 1.0  # (x, y, z, w)
+        self.goal_rot.assign(wp.from_torch(goal_rot, dtype=wp.quatf))
+
+        # initialize goal marker
+        self.goal_markers = VisualizationMarkers(self.cfg.goal_object_cfg)
+
+        # Reduction buffers for consecutive_successes update (Warp-only).
+        self._num_resets = wp.zeros(1, dtype=wp.float32, device=self.device)
+        self._finished_cons_successes = wp.zeros(1, dtype=wp.float32, device=self.device)
+        # track successes
+        self.successes = wp.zeros(self.num_envs, dtype=wp.float32, device=self.device)
+        self.consecutive_successes = wp.zeros(1, dtype=wp.float32, device=self.device)
+
+        # Persistent RL buffers (Warp).
+        self.actions = wp.zeros((self.num_envs, self.cfg.action_space), dtype=wp.float32, device=self.device)
+        self.observations = wp.zeros((self.num_envs, self.cfg.observation_space), dtype=wp.float32, device=self.device)
+        self.rewards = wp.zeros((self.num_envs,), dtype=wp.float32, device=self.device)
+        # Flag used as a print token for non-finite observations (Warp).
+        self.obs_nonfinite_flag = wp.zeros(1, dtype=wp.int32, device=self.device)
+
+        # Intermediate values (Warp) -- mirrors the Torch env's `_compute_intermediate_values` fields.
+        self.fingertip_pos = wp.zeros((self.num_envs, self.num_fingertips), dtype=wp.vec3f, device=self.device)
+        self.fingertip_rot = wp.zeros((self.num_envs, self.num_fingertips), dtype=wp.quatf, device=self.device)
+        self.fingertip_velocities = wp.zeros(
+            (self.num_envs, self.num_fingertips), dtype=wp.spatial_vectorf, device=self.device
+        )
+
+        self.object_pose = wp.zeros(self.num_envs, dtype=wp.transformf, device=self.device)
+        self.object_vels = wp.zeros(self.num_envs, dtype=wp.spatial_vectorf, device=self.device)
+
+        # RNG state (per-env) for randomizations in reset/goal resets.
+        self.rng_state = wp.zeros(self.num_envs, dtype=wp.uint32, device=self.device)
+        if self.cfg.seed is None:
+            self.cfg.seed = -1
+        wp.launch(
+            initialize_rng_state,
+            dim=self.num_envs,
+            inputs=[
+                self.cfg.seed,
+                self.rng_state,
+            ],
+            device=self.device,
+        )
+
+        # ---------------------------------------------------------------------
+        # Torch views / aliases
+        # ---------------------------------------------------------------------
+
+        # Bind torch buffers to warp buffers (same pattern as Warp Cartpole).
+        self.torch_obs_buf = wp.to_torch(self.observations)
+        self.torch_reward_buf = wp.to_torch(self.rewards)
+        self.torch_reset_terminated = wp.to_torch(self.reset_terminated)
+        self.torch_reset_time_outs = wp.to_torch(self.reset_time_outs)
+        self.torch_episode_length_buf = self.episode_length_buf  # already a torch tensor via wp.to_torch
+
+    def _setup_scene(self):
+        # add hand, in-hand object, and goal object
+        self.hand = Articulation(self.cfg.robot_cfg)
+        self.object = Articulation(self.cfg.object_cfg)
+        # add ground plane
+        spawn_ground_plane(prim_path="/World/ground", cfg=GroundPlaneCfg())
+        # clone and replicate (no need to filter for this environment)
+        self.scene.clone_environments(copy_from_source=False)
+        # add articulation to scene - we must register to scene to randomize with EventManager
+        self.scene.articulations["robot"] = self.hand
+        self.scene.articulations["object"] = self.object
+        # add lights
+        light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
+        light_cfg.func("/World/Light", light_cfg)
+
+    def _pre_physics_step(self, actions: wp.array) -> None:
+        # Store actions in a persistent Warp buffer (analogous to `actions.clone()` in the Torch env).
+        wp.copy(self.actions, actions)
+
+    def _apply_action(self) -> None:
+        wp.launch(
+            apply_actions_to_targets,
+            dim=(self.num_envs, self.num_actuated_dofs),
+            inputs=[
+                self.actions,
+                self.hand_dof_lower_limits,
+                self.hand_dof_upper_limits,
+                self.actuated_dof_indices,
+                self.cfg.act_moving_average,
+                self.prev_targets,
+                self.cur_targets,
+            ],
+            device=self.device,
+        )
+
+        # Apply position targets using mask method (CUDA graph safe).
+        # All joints are actuated for Allegro, so default masks (None = all) are correct.
+        self.hand.set_joint_position_target_mask(target=self.cur_targets)
+
+    def _get_observations(self) -> dict:
+        # if self.cfg.asymmetric_obs:
+        #    self.fingertip_force_sensors = self.hand.root_physx_view.get_link_incoming_joint_force()[
+        #        :, self.finger_bodies
+        #    ]
+        if self.cfg.obs_type == "openai":
+            self.compute_reduced_observations()
+        elif self.cfg.obs_type == "full":
+            self.compute_full_observations()
+        else:
+            raise ValueError(f"Unknown obs_type: {self.cfg.obs_type}")
+        return {"policy": self.torch_obs_buf}
+
+    def _get_rewards(self) -> None:
+        # Clear reduction buffers before launching the reward kernel.
+        # wp.assign(self._num_resets, 0.0)
+        # wp.assign(self._finished_cons_successes, 0.0)
+        self._num_resets.zero_()
+        self._finished_cons_successes.zero_()
+        wp.launch(
+            compute_rewards,
+            dim=self.num_envs,
+            inputs=[
+                self.reset_buf,
+                self.object_pose,
+                self.in_hand_pos,
+                self.goal_rot,
+                self.cfg.dist_reward_scale,
+                self.cfg.rot_reward_scale,
+                self.cfg.rot_eps,
+                self.actions,
+                self.cfg.action_penalty_scale,
+                self.cfg.success_tolerance,
+                self.cfg.reach_goal_bonus,
+                self.cfg.fall_dist,
+                self.cfg.fall_penalty,
+                self.cfg.action_space,
+                self.reset_goal_buf,
+                self.successes,
+                self._num_resets,
+                self._finished_cons_successes,
+                self.rewards,
+            ],
+            device=self.device,
+        )
+
+        # A separate kernel is needed as Warp does not support thread synchronization for reductions.
+        wp.launch(
+            update_consecutive_successes_from_stats,
+            dim=1,
+            inputs=[
+                self._num_resets,
+                self._finished_cons_successes,
+                self.cfg.av_factor,
+                self.consecutive_successes,
+            ],
+            device=self.device,
+        )
+
+        if "log" not in self.extras:
+            self.extras["log"] = dict()
+        # .mean() cannot be called here as it causes problems on stream
+        self.extras["log"]["consecutive_successes"] = wp.to_torch(self.consecutive_successes)
+
+        # Reset goals for envs that reached the target (mask is `reset_goal_buf`).
+        # This avoids Torch-side index extraction and keeps the step graphable.
+        self._reset_target_pose(mask=self.reset_goal_buf)
+
+    def _get_dones(self) -> None:
+        self._compute_intermediate_values()
+
+        wp.launch(
+            get_dones,
+            dim=self.num_envs,
+            inputs=[
+                self.max_episode_length,
+                self.object_pose,
+                self.in_hand_pos,
+                self.goal_rot,
+                self.cfg.fall_dist,
+                self.cfg.success_tolerance,
+                self.cfg.max_consecutive_success,
+                self.successes,
+                self._episode_length_buf_wp,
+                self.reset_terminated,
+                self.reset_time_outs,
+                self.reset_buf,
+            ],
+            device=self.device,
+        )
+
+    def _reset_idx(self, mask: wp.array | None = None):
+        if mask is None:
+            mask = self._ALL_ENV_MASK
+
+        # resets articulation and rigid body attributes
+        super()._reset_idx(mask)
+
+        # reset goals
+        self._reset_target_pose(mask=mask)
+
+        # reset object
+        wp.launch(
+            reset_object,
+            dim=self.num_envs,
+            inputs=[
+                self.object.data.default_root_pose,
+                self.env_origins,
+                self.cfg.reset_position_noise,
+                self.x_unit_vec,
+                self.y_unit_vec,
+                mask,
+                self.rng_state,
+                self.object.data.root_link_pose_w,
+                self.object.data.root_com_vel_w,
+            ],
+            device=self.device,
+        )
+
+        # reset hand
+        wp.launch(
+            reset_hand,
+            dim=self.num_envs,
+            inputs=[
+                self.hand.data.default_joint_pos,
+                self.hand.data.default_joint_vel,
+                self.hand_dof_lower_limits,
+                self.hand_dof_upper_limits,
+                self.cfg.reset_dof_pos_noise,
+                self.cfg.reset_dof_vel_noise,
+                mask,
+                self.num_hand_dofs,
+                self.rng_state,
+                self.hand.data.joint_pos,
+                self.hand.data.joint_vel,
+                self.prev_targets,
+                self.cur_targets,
+                self.hand_dof_targets,
+            ],
+            device=self.device,
+        )
+
+        self.hand.set_joint_position_target_mask(target=self.cur_targets, env_mask=mask)
+
+        wp.launch(
+            reset_successes,
+            dim=self.num_envs,
+            inputs=[
+                mask,
+                self.successes,
+            ],
+            device=self.device,
+        )
+
+        self._compute_intermediate_values()
+
+    def _reset_target_pose(self, env_ids: Sequence[int] | None = None, mask: wp.array | None = None):
+        # reset goal rotation
+        if mask is None:
+            if env_ids is None:
+                return
+            env_mask_list = [False] * self.num_envs
+            for env_id in env_ids:
+                env_mask_list[int(env_id)] = True
+            mask = wp.array(env_mask_list, dtype=wp.bool, device=self.device)
+
+        # update goal pose and markers
+        wp.launch(
+            reset_target_pose,
+            dim=self.num_envs,
+            inputs=[
+                mask,
+                self.x_unit_vec,
+                self.y_unit_vec,
+                self.env_origins,
+                self.goal_pos,
+                self.rng_state,
+                self.goal_rot,
+                self.reset_goal_buf,
+                self.goal_pos_w,
+            ],
+            device=self.device,
+        )
+
+    def _post_step_visualize(self) -> None:
+        """Update goal markers outside CUDA graph scope."""
+        self.goal_markers.visualize(wp.to_torch(self.goal_pos_w), wp.to_torch(self.goal_rot))
+
+    def _compute_intermediate_values(self):
+        # data for hand/object (Warp version of the Torch env's `_compute_intermediate_values`)
+        wp.launch(
+            compute_intermediate_values,
+            dim=self.num_envs,
+            inputs=[
+                self.hand.data.body_pos_w,
+                self.hand.data.body_quat_w,
+                self.hand.data.body_vel_w,
+                self.finger_bodies,
+                self.env_origins,
+                self.object.data.root_link_pose_w,
+                self.object.data.root_com_vel_w,
+                self.num_fingertips,
+                self.fingertip_pos,
+                self.fingertip_rot,
+                self.fingertip_velocities,
+                self.object_pose,
+                self.object_vels,
+            ],
+            device=self.device,
+        )
+
+    def compute_reduced_observations(self):
+        # Per https://arxiv.org/pdf/1808.00177.pdf Table 2
+        #   Fingertip positions
+        #   Object Position, but not orientation
+        #   Relative target orientation
+        wp.launch(
+            compute_reduced_observations,
+            dim=self.num_envs,
+            inputs=[
+                self.fingertip_pos,
+                self.object_pose,
+                self.goal_rot,
+                self.actions,
+                self.num_fingertips,
+                self.cfg.action_space,
+                self.observations,
+            ],
+            device=self.device,
+        )
+        # Warp-native non-finite sanitization + print-once.
+        wp.launch(
+            sanitize_and_print_once,
+            dim=(self.num_envs * self.cfg.observation_space),
+            inputs=[self.observations.flatten(), self.obs_nonfinite_flag],
+            device=self.device,
+        )
+        self.obs_nonfinite_flag.zero_()
+
+    def compute_full_observations(self):
+        wp.launch(
+            compute_full_observations,
+            dim=self.num_envs,
+            inputs=[
+                self.hand.data.joint_pos,
+                self.hand.data.joint_vel,
+                self.hand_dof_lower_limits,
+                self.hand_dof_upper_limits,
+                self.cfg.vel_obs_scale,
+                self.object_pose,
+                self.object_vels,
+                self.in_hand_pos,
+                self.goal_rot,
+                self.fingertip_pos,
+                self.fingertip_rot,
+                self.fingertip_velocities,
+                self.actions,
+                self.num_hand_dofs,
+                self.num_fingertips,
+                self.cfg.action_space,
+                self.observations,
+            ],
+            device=self.device,
+        )
+        # Warp-native non-finite sanitization + print-once.
+        wp.launch(
+            sanitize_and_print_once,
+            dim=(self.num_envs * self.cfg.observation_space),
+            inputs=[self.observations.flatten(), self.obs_nonfinite_flag],
+            device=self.device,
+        )
+        self.obs_nonfinite_flag.zero_()
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/__init__.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/__init__.py
new file mode 100644
index 000000000000..460a30569089
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
diff --git a/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/locomotion_env_warp.py b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/locomotion_env_warp.py
new file mode 100644
index 000000000000..6a726eb035ef
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/isaaclab_tasks_experimental/direct/locomotion/locomotion_env_warp.py
@@ -0,0 +1,575 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from __future__ import annotations
+
+import warp as wp
+from isaaclab_experimental.envs import DirectRLEnvWarp
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import Articulation
+from isaaclab.envs import DirectRLEnvCfg
+
+
+@wp.func
+def safe_normalize(v: wp.vec3f) -> wp.vec3f:
+    """Normalize a vector with epsilon to avoid NaN on zero-length vectors."""
+    length = wp.max(wp.length(v), 1e-9)
+    return v / length
+
+
+@wp.func
+def fmod(x: wp.float32, y: wp.float32) -> wp.float32:
+    return x - y * wp.floor(x / y)
+
+
+@wp.func
+def euler_from_quat(q: wp.quatf) -> wp.vec3f:
+    sinr_cosp = 2.0 * (q[3] * q[0] + q[1] * q[2])
+    cosr_cosp = q[3] * q[3] - q[0] * q[0] - q[1] * q[1] + q[2] * q[2]
+    sinp = 2.0 * (q[3] * q[1] - q[2] * q[0])
+    siny_cosp = 2.0 * (q[3] * q[2] + q[0] * q[1])
+    cosy_cosp = q[3] * q[3] + q[0] * q[0] - q[1] * q[1] - q[2] * q[2]
+    roll = wp.atan2(sinr_cosp, cosr_cosp)
+    if wp.abs(sinp) >= 1:
+        pitch = wp.sign(sinp) * wp.pi / 2.0
+    else:
+        pitch = wp.asin(sinp)
+    yaw = wp.atan2(siny_cosp, cosy_cosp)
+
+    return wp.vec3f(
+        fmod(roll, wp.static(2.0 * wp.pi)),
+        fmod(pitch, wp.static(2.0 * wp.pi)),
+        fmod(yaw, wp.static(2.0 * wp.pi)),
+    )
+
+
+@wp.kernel
+def get_dones(
+    episode_length_buf: wp.array(dtype=wp.int32),
+    torso_pose: wp.array(dtype=wp.transformf),
+    max_episode_length: wp.int32,
+    termination_height: wp.float32,
+    out_of_bounds: wp.array(dtype=wp.bool),
+    time_out: wp.array(dtype=wp.bool),
+    reset: wp.array(dtype=wp.bool),
+):
+    env_index = wp.tid()
+    out_of_bounds[env_index] = wp.abs(torso_pose[env_index][2]) < termination_height
+    time_out[env_index] = episode_length_buf[env_index] >= (max_episode_length - 1)
+    reset[env_index] = out_of_bounds[env_index] or time_out[env_index]
+
+
+@wp.kernel
+def observations(
+    torso_pose: wp.array(dtype=wp.transformf),
+    velocity: wp.array(dtype=wp.spatial_vectorf),
+    rpy: wp.array(dtype=wp.vec3f),
+    angle_to_target: wp.array(dtype=wp.float32),
+    up_proj: wp.array(dtype=wp.float32),
+    heading_proj: wp.array(dtype=wp.float32),
+    dof_pos_scaled: wp.array2d(dtype=wp.float32),
+    dof_vel: wp.array2d(dtype=wp.float32),
+    actions: wp.array2d(dtype=wp.float32),
+    observations: wp.array2d(dtype=wp.float32),
+    dof_vel_scale: wp.float32,
+    angular_velocity_scale: wp.float32,
+    num_dof: wp.int32,
+):
+    env_index = wp.tid()
+    observations[env_index, 0] = torso_pose[env_index][2]
+    observations[env_index, 1] = velocity[env_index][0]
+    observations[env_index, 2] = velocity[env_index][1]
+    observations[env_index, 3] = velocity[env_index][2]
+    observations[env_index, 4] = velocity[env_index][3] * angular_velocity_scale
+    observations[env_index, 5] = velocity[env_index][4] * angular_velocity_scale
+    observations[env_index, 6] = velocity[env_index][5] * angular_velocity_scale
+    observations[env_index, 7] = wp.atan2(wp.sin(rpy[env_index][2]), wp.cos(rpy[env_index][2]))
+    observations[env_index, 8] = wp.atan2(wp.sin(rpy[env_index][0]), wp.cos(rpy[env_index][0]))
+    observations[env_index, 9] = wp.atan2(wp.sin(angle_to_target[env_index]), wp.cos(angle_to_target[env_index]))
+    observations[env_index, 10] = up_proj[env_index]
+    observations[env_index, 11] = heading_proj[env_index]
+
+    offset_1 = 12 + num_dof
+    offset_2 = offset_1 + num_dof
+
+    for i in range(num_dof):
+        observations[env_index, 12 + i] = dof_pos_scaled[env_index, i]
+    for i in range(num_dof):
+        observations[env_index, offset_1 + i] = dof_vel[env_index, i] * dof_vel_scale
+    for i in range(num_dof):
+        observations[env_index, offset_2 + i] = actions[env_index, i]
+
+
+@wp.func
+def translate_transform(
+    transform: wp.transformf,
+    translation: wp.vec3f,
+) -> wp.transformf:
+    return wp.transform(
+        wp.transform_get_translation(transform) + translation,
+        wp.transform_get_rotation(transform),
+    )
+
+
+@wp.kernel
+def reset_root(
+    default_root_pose: wp.array(dtype=wp.transformf),
+    default_root_vel: wp.array(dtype=wp.spatial_vectorf),
+    env_origins: wp.array(dtype=wp.vec3f),
+    dt: wp.float32,
+    targets: wp.array(dtype=wp.vec3f),
+    to_targets: wp.array(dtype=wp.vec3f),
+    potentials: wp.array(dtype=wp.float32),
+    root_pose: wp.array(dtype=wp.transformf),
+    root_vel: wp.array(dtype=wp.spatial_vectorf),
+    env_mask: wp.array(dtype=wp.bool),
+):
+    env_index = wp.tid()
+    if env_mask[env_index]:
+        root_pose[env_index] = default_root_pose[env_index]
+        root_pose[env_index] = translate_transform(root_pose[env_index], env_origins[env_index])
+        root_vel[env_index] = default_root_vel[env_index]
+        to_targets[env_index] = targets[env_index] - wp.transform_get_translation(root_pose[env_index])
+        to_targets[env_index][2] = 0.0
+        potentials[env_index] = -wp.length(to_targets[env_index]) / dt
+
+
+@wp.kernel
+def reset_joints(
+    default_joint_pos: wp.array2d(dtype=wp.float32),
+    default_joint_vel: wp.array2d(dtype=wp.float32),
+    joint_pos: wp.array2d(dtype=wp.float32),
+    joint_vel: wp.array2d(dtype=wp.float32),
+    env_mask: wp.array(dtype=wp.bool),
+):
+    env_index, joint_index = wp.tid()
+    if env_mask[env_index]:
+        joint_pos[env_index, joint_index] = default_joint_pos[env_index, joint_index]
+        joint_vel[env_index, joint_index] = default_joint_vel[env_index, joint_index]
+
+
+@wp.func
+def heading_reward(
+    heading_proj: wp.float32,
+    heading_weight: wp.float32,
+) -> wp.float32:
+    if heading_proj > 0.8:
+        return heading_weight
+    else:
+        return heading_weight * heading_proj / 0.8
+
+
+@wp.func
+def up_reward(
+    up_proj: wp.float32,
+    up_weight: wp.float32,
+) -> wp.float32:
+    if up_proj > 0.93:
+        return up_weight
+    else:
+        return 0.0
+
+
+@wp.func
+def progress_reward(
+    current_value: wp.float32,
+    prev_value: wp.float32,
+) -> wp.float32:
+    return current_value - prev_value
+
+
+@wp.func
+def actions_cost(
+    actions: wp.array(dtype=wp.float32),
+) -> wp.float32:
+    sum_ = wp.float32(0.0)
+    for i in range(len(actions)):
+        sum_ += actions[i] * actions[i]
+    return sum_
+
+
+@wp.func
+def electricity_cost(
+    actions: wp.array(dtype=wp.float32),
+    dof_vel: wp.array(dtype=wp.float32),
+    dof_vel_scale: wp.float32,
+    motor_effort_ratio: wp.array(dtype=wp.float32),
+) -> wp.float32:
+    sum_ = wp.float32(0.0)
+    for i in range(len(actions)):
+        sum_ += wp.abs(actions[i] * dof_vel[i] * dof_vel_scale) * motor_effort_ratio[i]
+    return sum_
+
+
+@wp.func
+def dof_at_limit_cost(
+    dof_pos_scaled: wp.array(dtype=wp.float32),
+) -> wp.float32:
+    sum_ = wp.float32(0.0)
+    for i in range(len(dof_pos_scaled)):
+        if dof_pos_scaled[i] > 0.98:
+            sum_ += 1.0
+    return sum_
+
+
+@wp.kernel
+def compute_rewards(
+    actions: wp.array2d(dtype=wp.float32),
+    dof_vel: wp.array2d(dtype=wp.float32),
+    dof_pos_scaled: wp.array2d(dtype=wp.float32),
+    reset_terminated: wp.array(dtype=wp.bool),
+    heading_proj: wp.array(dtype=wp.float32),
+    up_proj: wp.array(dtype=wp.float32),
+    potentials: wp.array(dtype=wp.float32),
+    prev_potentials: wp.array(dtype=wp.float32),
+    motor_effort_ratio: wp.array(dtype=wp.float32),
+    up_weight: wp.float32,
+    heading_weight: wp.float32,
+    actions_cost_scale: wp.float32,
+    energy_cost_scale: wp.float32,
+    dof_vel_scale: wp.float32,
+    death_cost: wp.float32,
+    alive_reward_scale: wp.float32,
+    reward: wp.array(dtype=wp.float32),
+):
+    env_index = wp.tid()
+    if reset_terminated[env_index]:
+        reward[env_index] = death_cost
+    else:
+        reward[env_index] = (
+            progress_reward(potentials[env_index], prev_potentials[env_index])
+            + alive_reward_scale
+            + up_reward(up_proj[env_index], up_weight)
+            + heading_reward(heading_proj[env_index], heading_weight)
+            - actions_cost_scale * actions_cost(actions[env_index])
+            - energy_cost_scale
+            * electricity_cost(actions[env_index], dof_vel[env_index], dof_vel_scale, motor_effort_ratio)
+            - dof_at_limit_cost(dof_pos_scaled[env_index])
+        )
+
+
+@wp.kernel
+def compute_heading_and_up(
+    torso_pose: wp.array(dtype=wp.transformf),
+    targets: wp.array(dtype=wp.vec3f),
+    dt: wp.float32,
+    to_targets: wp.array(dtype=wp.vec3f),
+    up_proj: wp.array(dtype=wp.float32),
+    heading_proj: wp.array(dtype=wp.float32),
+    up_vec: wp.array(dtype=wp.vec3f),
+    heading_vec: wp.array(dtype=wp.vec3f),
+    potentials: wp.array(dtype=wp.float32),
+    prev_potentials: wp.array(dtype=wp.float32),
+):
+    env_index = wp.tid()
+    up_vec[env_index] = wp.quat_rotate(wp.transform_get_rotation(torso_pose[env_index]), wp.static(wp.vec3f(0, 0, 1)))
+    heading_vec[env_index] = wp.quat_rotate(
+        wp.transform_get_rotation(torso_pose[env_index]), wp.static(wp.vec3f(1, 0, 0))
+    )
+    up_proj[env_index] = up_vec[env_index][2]
+    to_targets[env_index] = targets[env_index] - wp.transform_get_translation(torso_pose[env_index])
+    to_targets[env_index][2] = 0.0
+    heading_proj[env_index] = wp.dot(heading_vec[env_index], safe_normalize(to_targets[env_index]))
+    prev_potentials[env_index] = potentials[env_index]
+    potentials[env_index] = -wp.length(to_targets[env_index]) / dt
+
+
+@wp.func
+def spatial_rotate_inv(quat: wp.quatf, vec: wp.spatial_vectorf) -> wp.spatial_vectorf:
+    return wp.spatial_vector(
+        wp.quat_rotate_inv(quat, wp.spatial_top(vec)),
+        wp.quat_rotate_inv(quat, wp.spatial_bottom(vec)),
+    )
+
+
+@wp.func
+def unscale(x: wp.float32, lower: wp.float32, upper: wp.float32) -> wp.float32:
+    return (2.0 * x - upper - lower) / (upper - lower)
+
+
+@wp.kernel
+def compute_rot(
+    torso_pose: wp.array(dtype=wp.transformf),
+    velocity: wp.array(dtype=wp.spatial_vectorf),
+    targets: wp.array(dtype=wp.vec3f),
+    vec_loc: wp.array(dtype=wp.spatial_vectorf),
+    rpy: wp.array(dtype=wp.vec3f),
+    angle_to_target: wp.array(dtype=wp.float32),
+):
+    env_index = wp.tid()
+    vec_loc[env_index] = spatial_rotate_inv(wp.transform_get_rotation(torso_pose[env_index]), velocity[env_index])
+    rpy[env_index] = euler_from_quat(wp.transform_get_rotation(torso_pose[env_index]))
+    angle_to_target[env_index] = (
+        wp.atan2(targets[env_index][1] - torso_pose[env_index][1], targets[env_index][0] - torso_pose[env_index][0])
+        - rpy[env_index][2]
+    )
+
+
+@wp.kernel
+def scale_dof_pos(
+    dof_pos: wp.array2d(dtype=wp.float32),
+    dof_limits: wp.array2d(dtype=wp.vec2f),
+    dof_pos_scaled: wp.array2d(dtype=wp.float32),
+):
+    env_index, joint_index = wp.tid()
+    dof_pos_scaled[env_index, joint_index] = unscale(
+        dof_pos[env_index, joint_index], dof_limits[env_index, joint_index][0], dof_limits[env_index, joint_index][1]
+    )
+
+
+@wp.kernel
+def update_actions(
+    input_actions: wp.array2d(dtype=wp.float32),
+    actions: wp.array2d(dtype=wp.float32),
+    joint_gears: wp.array(dtype=wp.float32),
+    action_scale: wp.float32,
+):
+    env_index, joint_index = wp.tid()
+    actions[env_index, joint_index] = (
+        action_scale * joint_gears[joint_index] * wp.clamp(input_actions[env_index, joint_index], -1.0, 1.0)
+    )
+
+
+@wp.kernel
+def initialize_state(
+    env_origins: wp.array(dtype=wp.vec3f),
+    targets: wp.array(dtype=wp.vec3f),
+    state: wp.array(dtype=wp.uint32),
+    seed: wp.int32,
+):
+    env_index = wp.tid()
+    state[env_index] = wp.rand_init(seed, env_index)
+    targets[env_index] = env_origins[env_index]
+    targets[env_index] += wp.static(wp.vec3f(1000.0, 0.0, 0.0))
+
+
+class LocomotionWarpEnv(DirectRLEnvWarp):
+    cfg: DirectRLEnvCfg
+
+    def __init__(self, cfg: DirectRLEnvCfg, render_mode: str | None = None, **kwargs):
+        super().__init__(cfg, render_mode, **kwargs)
+
+        self.action_scale = self.cfg.action_scale
+        self.joint_gears = wp.array(self.cfg.joint_gears, dtype=wp.float32, device=self.sim.device)
+        self.motor_effort_ratio = wp.ones_like(self.joint_gears, device=self.sim.device)
+        self._joint_dof_idx, _ = self.robot.find_joints(".*")
+
+        # Simulation bindings
+        # Note: these are direct memory views into the Newton simulation data, they should not be modified directly
+        self.joint_pos = self.robot.data.joint_pos
+        self.joint_vel = self.robot.data.joint_vel
+        self.root_pose_w = self.robot.data.root_pose_w
+        self.root_vel_w = self.robot.data.root_vel_w
+        self.soft_joint_pos_limits = self.robot.data.soft_joint_pos_limits
+
+        # Buffers
+        self.observations = wp.zeros(
+            (self.num_envs, self.cfg.observation_space), dtype=wp.float32, device=self.sim.device
+        )
+        self.rewards = wp.zeros((self.num_envs), dtype=wp.float32, device=self.sim.device)
+        self.actions = wp.zeros((self.num_envs, self.robot.num_joints), dtype=wp.float32, device=self.sim.device)
+        self.states = wp.zeros((self.num_envs), dtype=wp.uint32, device=self.sim.device)
+        self.potentials = wp.zeros(self.num_envs, dtype=wp.float32, device=self.sim.device)
+        self.prev_potentials = wp.zeros_like(self.potentials)
+        self.targets = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device)
+        self.up_vec = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device)
+        self.heading_vec = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device)
+        self.to_targets = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device)
+        self.up_proj = wp.zeros((self.num_envs), dtype=wp.float32, device=self.sim.device)
+        self.heading_proj = wp.zeros((self.num_envs), dtype=wp.float32, device=self.sim.device)
+        self.vec_loc = wp.zeros((self.num_envs), dtype=wp.spatial_vectorf, device=self.sim.device)
+        self.rpy = wp.zeros((self.num_envs), dtype=wp.vec3f, device=self.sim.device)
+        self.angle_to_target = wp.zeros((self.num_envs), dtype=wp.float32, device=self.sim.device)
+        self.dof_pos_scaled = wp.zeros((self.num_envs, self.robot.num_joints), dtype=wp.float32, device=self.sim.device)
+        self.env_origins = wp.from_torch(self.scene.env_origins, dtype=wp.vec3f)
+        self.actions_mapped = wp.zeros((self.num_envs, self.robot.num_joints), dtype=wp.float32, device=self.sim.device)
+
+        # Initial states and targets
+        if self.cfg.seed is None:
+            self.cfg.seed = -1
+
+        wp.launch(
+            initialize_state,
+            dim=self.num_envs,
+            inputs=[
+                self.env_origins,
+                self.targets,
+                self.states,
+                self.cfg.seed,
+            ],
+        )
+
+        # Bind torch buffers to warp buffers
+        self.torch_obs_buf = wp.to_torch(self.observations)
+        self.torch_reward_buf = wp.to_torch(self.rewards)
+        self.torch_reset_terminated = wp.to_torch(self.reset_terminated)
+        self.torch_reset_time_outs = wp.to_torch(self.reset_time_outs)
+        self.torch_episode_length_buf = self.episode_length_buf  # already a torch tensor via wp.to_torch
+
+    def _setup_scene(self) -> None:
+        self.robot = Articulation(self.cfg.robot)
+        # add ground plane
+        self.cfg.terrain.num_envs = self.scene.cfg.num_envs
+        self.cfg.terrain.env_spacing = self.scene.cfg.env_spacing
+        self.terrain = self.cfg.terrain.class_type(self.cfg.terrain)
+        # clone and replicate
+        self.scene.clone_environments(copy_from_source=False)
+        # add articulation to scene
+        self.scene.articulations["robot"] = self.robot
+        # add lights
+        light_cfg = sim_utils.DomeLightCfg(intensity=2000.0, color=(0.75, 0.75, 0.75))
+        light_cfg.func("/World/Light", light_cfg)
+
+    def _pre_physics_step(self, actions: wp.array) -> None:
+        self.actions.assign(actions)
+        wp.launch(
+            update_actions,
+            dim=(self.num_envs, self.robot.num_joints),
+            inputs=[actions, self.actions_mapped, self.joint_gears, self.action_scale],
+        )
+
+    def _apply_action(self) -> None:
+        self.robot.set_joint_effort_target_mask(target=self.actions_mapped)
+
+    def _compute_intermediate_values(self) -> None:
+        wp.launch(
+            compute_heading_and_up,
+            dim=self.num_envs,
+            inputs=[
+                self.root_pose_w,
+                self.targets,
+                self.cfg.sim.dt,
+                self.to_targets,
+                self.up_proj,
+                self.heading_proj,
+                self.up_vec,
+                self.heading_vec,
+                self.potentials,
+                self.prev_potentials,
+            ],
+        )
+
+        wp.launch(
+            compute_rot,
+            dim=self.num_envs,
+            inputs=[
+                self.root_pose_w,
+                self.root_vel_w,
+                self.targets,
+                self.vec_loc,
+                self.rpy,
+                self.angle_to_target,
+            ],
+        )
+        wp.launch(
+            scale_dof_pos,
+            dim=(self.num_envs, self.robot.num_joints),
+            inputs=[
+                self.joint_pos,
+                self.soft_joint_pos_limits,
+                self.dof_pos_scaled,
+            ],
+        )
+
+    def _get_observations(self) -> dict:
+        wp.launch(
+            observations,
+            dim=self.num_envs,
+            inputs=[
+                self.root_pose_w,
+                self.vec_loc,
+                self.rpy,
+                self.angle_to_target,
+                self.up_proj,
+                self.heading_proj,
+                self.dof_pos_scaled,
+                self.joint_vel,
+                self.actions,
+                self.observations,
+                self.cfg.dof_vel_scale,
+                self.cfg.angular_velocity_scale,
+                self.robot.num_joints,
+            ],
+        )
+        return {"policy": self.torch_obs_buf}
+
+    def _get_rewards(self) -> None:
+        wp.launch(
+            compute_rewards,
+            dim=self.num_envs,
+            inputs=[
+                self.actions,
+                self.joint_vel,
+                self.dof_pos_scaled,
+                self.reset_terminated,
+                self.heading_proj,
+                self.up_proj,
+                self.potentials,
+                self.prev_potentials,
+                self.motor_effort_ratio,
+                self.cfg.up_weight,
+                self.cfg.heading_weight,
+                self.cfg.actions_cost_scale,
+                self.cfg.energy_cost_scale,
+                self.cfg.dof_vel_scale,
+                self.cfg.death_cost,
+                self.cfg.alive_reward_scale,
+                self.rewards,
+            ],
+        )
+
+    def _get_dones(self) -> None:
+        self._compute_intermediate_values()
+
+        wp.launch(
+            get_dones,
+            dim=self.num_envs,
+            inputs=[
+                self._episode_length_buf_wp,
+                self.root_pose_w,
+                self.max_episode_length,
+                self.cfg.termination_height,
+                self.reset_terminated,
+                self.reset_time_outs,
+                self.reset_buf,
+            ],
+        )
+
+    def _reset_idx(self, mask: wp.array | None = None):
+        if mask is None:
+            mask = self._ALL_ENV_MASK
+
+        super()._reset_idx(mask)
+
+        wp.launch(
+            reset_root,
+            dim=self.num_envs,
+            inputs=[
+                self.robot.data.default_root_pose,
+                self.robot.data.default_root_vel,
+                self.env_origins,
+                self.cfg.sim.dt,
+                self.targets,
+                self.to_targets,
+                self.potentials,
+                self.root_pose_w,
+                self.root_vel_w,
+                mask,
+            ],
+        )
+        wp.launch(
+            reset_joints,
+            dim=(self.num_envs, self.robot.num_joints),
+            inputs=[
+                self.robot.data.default_joint_pos,
+                self.robot.data.default_joint_vel,
+                self.joint_pos,
+                self.joint_vel,
+                mask,
+            ],
+        )
+
+        self._compute_intermediate_values()
diff --git a/source/isaaclab_tasks_experimental/pyproject.toml b/source/isaaclab_tasks_experimental/pyproject.toml
new file mode 100644
index 000000000000..d90ac3536f16
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel", "toml"]
+build-backend = "setuptools.build_meta"
diff --git a/source/isaaclab_tasks_experimental/setup.py b/source/isaaclab_tasks_experimental/setup.py
new file mode 100644
index 000000000000..ae77016f8984
--- /dev/null
+++ b/source/isaaclab_tasks_experimental/setup.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Installation script for the 'isaaclab_tasks_experimental' python package."""
+
+import os
+
+import toml
+from setuptools import find_packages, setup
+
+# Obtain the extension data from the extension.toml file
+EXTENSION_PATH = os.path.dirname(os.path.realpath(__file__))
+# Read the extension.toml file
+EXTENSION_TOML_DATA = toml.load(os.path.join(EXTENSION_PATH, "config", "extension.toml"))
+
+# Installation operation
+setup(
+    name="isaaclab_tasks_experimental",
+    author="Isaac Lab Project Developers",
+    maintainer="Isaac Lab Project Developers",
+    url=EXTENSION_TOML_DATA["package"]["repository"],
+    version=EXTENSION_TOML_DATA["package"]["version"],
+    description=EXTENSION_TOML_DATA["package"]["description"],
+    keywords=EXTENSION_TOML_DATA["package"]["keywords"],
+    include_package_data=True,
+    python_requires=">=3.10",
+    install_requires=["isaaclab_tasks"],
+    packages=find_packages(),
+    classifiers=[
+        "Natural Language :: English",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Isaac Sim :: 5.0.0",
+        "Isaac Sim :: 5.1.0",
+        "Isaac Sim :: 6.0.0",
+    ],
+    zip_safe=False,
+)

From 7eaaa3f85abe781c3fd8a26fcc7145af6ffafcf0 Mon Sep 17 00:00:00 2001
From: Antoine RICHARD <antoiner@nvidia.com>
Date: Fri, 13 Mar 2026 13:24:56 +0100
Subject: [PATCH 2/2] Update source/isaaclab_experimental/setup.py

Signed-off-by: Antoine RICHARD <antoiner@nvidia.com>
---
 source/isaaclab_experimental/setup.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/isaaclab_experimental/setup.py b/source/isaaclab_experimental/setup.py
index d44ab67d789e..f999fcd026f9 100644
--- a/source/isaaclab_experimental/setup.py
+++ b/source/isaaclab_experimental/setup.py
@@ -35,10 +35,11 @@
     install_requires=INSTALL_REQUIRES,
     packages=find_packages(),
     classifiers=[
-        "Natural Language :: English",
-        "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
         "Isaac Sim :: 5.0.0",
+        "Isaac Sim :: 5.1.0",
+        "Isaac Sim :: 6.0.0",
     ],
     zip_safe=False,
 )