From 067a955c1a1cdec243f7e4667ad591712a82936f Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Thu, 7 May 2026 20:24:34 +0000
Subject: [PATCH 1/3] Updated Camera, CameraData and Renderer to use Warp
 instead of Torch

---
 scripts/demos/sensors/cameras.py              |  17 +-
 .../tutorials/04_sensors/run_usd_camera.py    |  12 +-
 .../camera-data-proxyarray.minor.rst          |  38 +++++
 .../isaaclab/envs/mdp/observations.py         |   6 +-
 .../isaaclab/renderers/base_renderer.py       |  17 +-
 .../isaaclab/sensors/camera/camera.py         | 139 +++++++++++-----
 .../isaaclab/sensors/camera/camera_data.py    |  57 +++++--
 .../multi_mesh_ray_caster_camera.py           |  41 +++--
 .../multi_mesh_ray_caster_camera_data.py      |   9 +-
 .../sensors/ray_caster/ray_caster_camera.py   | 151 ++++++++++++------
 .../renderers/test_camera_output_contract.py  |  41 +++--
 .../tacsl_sensor/visuotactile_sensor.py       |   8 +-
 .../changelog.d/camera-data-proxyarray.skip   |   2 +
 .../renderers/newton_warp_renderer.py         |  58 ++++---
 .../changelog.d/camera-data-proxyarray.skip   |   2 +
 .../isaaclab_ov/renderers/ovrtx_renderer.py   |  42 ++---
 .../camera-data-proxyarray.minor.rst          |  13 ++
 .../renderers/isaac_rtx_renderer.py           |  27 ++--
 .../direct/cartpole/cartpole_camera_env.py    |  14 +-
 .../shadow_hand/shadow_hand_vision_env.py     |   5 +-
 .../manipulation/dexsuite/mdp/observations.py |   3 +-
 21 files changed, 475 insertions(+), 227 deletions(-)
 create mode 100644 source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst
 create mode 100644 source/isaaclab_newton/changelog.d/camera-data-proxyarray.skip
 create mode 100644 source/isaaclab_ov/changelog.d/camera-data-proxyarray.skip
 create mode 100644 source/isaaclab_physx/changelog.d/camera-data-proxyarray.minor.rst

diff --git a/scripts/demos/sensors/cameras.py b/scripts/demos/sensors/cameras.py
index 400881ac5d62..970bd7841364 100644
--- a/scripts/demos/sensors/cameras.py
+++ b/scripts/demos/sensors/cameras.py
@@ -239,8 +239,11 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
         # save every 10th image (for visualization purposes only)
         # note: saving images will slow down the simulation
         if count % 10 == 0:
-            # compare generated RGB images across different cameras
-            rgb_images = [scene["camera"].data.output["rgb"][0, ..., :3], scene["tiled_camera"].data.output["rgb"][0]]
+            # compare generated RGB images across different cameras (use ProxyArray.torch)
+            rgb_images = [
+                scene["camera"].data.output["rgb"].torch[0, ..., :3],
+                scene["tiled_camera"].data.output["rgb"].torch[0],
+            ]
             save_images_grid(
                 rgb_images,
                 subtitles=["Camera"],
@@ -250,9 +253,9 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
 
             # compare generated Depth images across different cameras
             depth_images = [
-                scene["camera"].data.output["distance_to_image_plane"][0],
-                scene["tiled_camera"].data.output["distance_to_image_plane"][0, ..., 0],
-                scene["raycast_camera"].data.output["distance_to_image_plane"][0],
+                scene["camera"].data.output["distance_to_image_plane"].torch[0],
+                scene["tiled_camera"].data.output["distance_to_image_plane"].torch[0, ..., 0],
+                scene["raycast_camera"].data.output["distance_to_image_plane"].torch[0],
             ]
             save_images_grid(
                 depth_images,
@@ -263,7 +266,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
             )
 
             # save all tiled RGB images
-            tiled_images = scene["tiled_camera"].data.output["rgb"]
+            tiled_images = scene["tiled_camera"].data.output["rgb"].torch
             save_images_grid(
                 tiled_images,
                 subtitles=[f"Cam{i}" for i in range(tiled_images.shape[0])],
@@ -272,7 +275,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
             )
 
             # save all camera RGB images
-            cam_images = scene["camera"].data.output["rgb"][..., :3]
+            cam_images = scene["camera"].data.output["rgb"].torch[..., :3]
             save_images_grid(
                 cam_images,
                 subtitles=[f"Cam{i}" for i in range(cam_images.shape[0])],
diff --git a/scripts/tutorials/04_sensors/run_usd_camera.py b/scripts/tutorials/04_sensors/run_usd_camera.py
index 5c041d737c5d..2a9626810238 100644
--- a/scripts/tutorials/04_sensors/run_usd_camera.py
+++ b/scripts/tutorials/04_sensors/run_usd_camera.py
@@ -232,7 +232,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
             # Save images from camera at camera_index
             # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
             single_cam_data = convert_dict_to_backend(
-                {k: v[camera_index] for k, v in camera.data.output.items()}, backend="numpy"
+                {k: v.torch[camera_index] for k, v in camera.data.output.items()}, backend="numpy"
             )
 
             # Pack data back into replicator format to save them using its writer
@@ -245,7 +245,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
                     rep_output["annotators"][key] = {"render_product": {"data": data}}
             # Save images
             # Note: We need to provide On-time data for Replicator to save the images.
-            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
+            rep_output["trigger_outputs"] = {"on_time": camera.frame.torch[camera_index]}
             rep_writer.write(rep_output)
 
         # Draw pointcloud if there is a GUI and --draw has been passed
@@ -256,10 +256,10 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
         ):
             # Derive pointcloud from camera at camera_index
             pointcloud = create_pointcloud_from_depth(
-                intrinsic_matrix=camera.data.intrinsic_matrices[camera_index],
-                depth=camera.data.output["distance_to_image_plane"][camera_index],
-                position=camera.data.pos_w[camera_index],
-                orientation=camera.data.quat_w_ros[camera_index],
+                intrinsic_matrix=camera.data.intrinsic_matrices.torch[camera_index],
+                depth=camera.data.output["distance_to_image_plane"].torch[camera_index],
+                position=camera.data.pos_w.torch[camera_index],
+                orientation=camera.data.quat_w_ros.torch[camera_index],
                 device=sim.device,
             )
 
diff --git a/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst b/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst
new file mode 100644
index 000000000000..080dff3cf95a
--- /dev/null
+++ b/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst
@@ -0,0 +1,38 @@
+Changed
+^^^^^^^
+
+* **Breaking:** :class:`~isaaclab.sensors.camera.CameraData` now stores its array fields
+  as :class:`~isaaclab.utils.warp.proxy_array.ProxyArray` wrappers around
+  :class:`warp.array` primary buffers. The change covers
+  :attr:`~isaaclab.sensors.camera.CameraData.pos_w`,
+  :attr:`~isaaclab.sensors.camera.CameraData.quat_w_world`,
+  :attr:`~isaaclab.sensors.camera.CameraData.intrinsic_matrices`,
+  every entry in :attr:`~isaaclab.sensors.camera.CameraData.output`, and the
+  derived :attr:`~isaaclab.sensors.camera.CameraData.quat_w_ros` /
+  :attr:`~isaaclab.sensors.camera.CameraData.quat_w_opengl` properties.
+  Migration: read torch tensors via the ``.torch`` accessor (zero-copy) and pass
+  ``.warp`` to warp kernels (e.g. ``camera.data.output["rgb"].torch``,
+  ``camera.data.pos_w.warp``). Existing code that uses the field directly as a
+  tensor still works through :class:`ProxyArray`'s deprecation bridge but emits
+  a one-time :class:`DeprecationWarning`.
+* **Breaking:** :class:`~isaaclab.sensors.ray_caster.MultiMeshRayCasterCameraData`
+  ``image_mesh_ids`` is likewise a :class:`ProxyArray` over a ``wp.int16`` warp
+  buffer; access via ``.torch`` for tensor reads.
+* **Breaking:** :attr:`~isaaclab.sensors.camera.Camera.frame` and
+  :attr:`~isaaclab.sensors.ray_caster.RayCasterCamera.frame` now return a
+  :class:`ProxyArray` over a ``wp.int64`` array instead of a ``torch.Tensor``.
+  Migration: use ``camera.frame.torch`` for the existing tensor view.
+* **Breaking:**
+  :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs` and
+  :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_camera` now type
+  their array parameters as :class:`warp.array` rather than
+  :class:`torch.Tensor`. Renderer subclasses receive the underlying
+  :attr:`ProxyArray.warp` buffers directly from the camera; consumers that
+  implemented :class:`BaseRenderer` must update their signatures.
+* :meth:`~isaaclab.sensors.camera.Camera.set_intrinsic_matrices`,
+  :meth:`~isaaclab.sensors.camera.Camera.set_world_poses`, and
+  :meth:`~isaaclab.sensors.camera.Camera.set_world_poses_from_view` (plus their
+  :class:`~isaaclab.sensors.ray_caster.RayCasterCamera` analogues) now accept
+  ``wp.array | ProxyArray | torch.Tensor | numpy.ndarray`` inputs, with
+  :class:`warp.array` documented as the canonical type. Existing
+  ``torch.Tensor`` callers continue to work without changes.
diff --git a/source/isaaclab/isaaclab/envs/mdp/observations.py b/source/isaaclab/isaaclab/envs/mdp/observations.py
index 8f97da3595dd..adf09116cde2 100644
--- a/source/isaaclab/isaaclab/envs/mdp/observations.py
+++ b/source/isaaclab/isaaclab/envs/mdp/observations.py
@@ -407,12 +407,12 @@ def image(
     # extract the used quantities (to enable type-hinting)
     sensor: Camera | RayCasterCamera = env.scene.sensors[sensor_cfg.name]
 
-    # obtain the input image
-    images = sensor.data.output[data_type]
+    # obtain the input image (CameraData fields are ProxyArray; .torch is a zero-copy view)
+    images = sensor.data.output[data_type].torch
 
     # depth image conversion
     if (data_type == "distance_to_camera") and convert_perspective_to_orthogonal:
-        images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices)
+        images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices.torch)
 
     # rgb/depth/normals image normalization
     if normalize:
diff --git a/source/isaaclab/isaaclab/renderers/base_renderer.py b/source/isaaclab/isaaclab/renderers/base_renderer.py
index 2fc498eae8e3..604dc6ea8794 100644
--- a/source/isaaclab/isaaclab/renderers/base_renderer.py
+++ b/source/isaaclab/isaaclab/renderers/base_renderer.py
@@ -10,12 +10,12 @@
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any
 
+import warp as wp
+
 from .camera_render_spec import CameraRenderSpec
 from .output_contract import RenderBufferKind, RenderBufferSpec
 
 if TYPE_CHECKING:
-    import torch
-
     from isaaclab.sensors.camera.camera_data import CameraData
 
 
@@ -60,13 +60,16 @@ def create_render_data(self, spec: CameraRenderSpec) -> Any:
         pass
 
     @abstractmethod
-    def set_outputs(self, render_data: Any, output_data: dict[str, torch.Tensor]) -> None:
+    def set_outputs(self, render_data: Any, output_data: dict[str, wp.array]) -> None:
         """Store reference to output buffers for writing during render.
 
         Args:
             render_data: The render data object from :meth:`create_render_data`.
             output_data: Dictionary mapping output names (e.g. ``"rgb"``, ``"depth"``)
-                to pre-allocated tensors where rendered data will be written.
+                to pre-allocated :class:`warp.array` buffers where rendered data
+                will be written. Each buffer has shape ``(num_views, height, width,
+                channels)`` and the per-kind dtype declared by
+                :meth:`supported_output_types`.
         """
         pass
 
@@ -80,14 +83,14 @@ def update_transforms(self) -> None:
 
     @abstractmethod
     def update_camera(
-        self, render_data: Any, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor
+        self, render_data: Any, positions: wp.array, orientations: wp.array, intrinsics: wp.array
     ) -> None:
         """Update camera poses and intrinsics for the next render.
 
         Args:
             render_data: The render data object from :meth:`create_render_data`.
-            positions: Camera positions in world frame, shape ``(N, 3)``.
-            orientations: Camera orientations as quaternions (x, y, z, w), shape ``(N, 4)``.
+            positions: Camera positions [m] in world frame, shape ``(N, 3)``.
+            orientations: Camera orientations as quaternions ``(x, y, z, w)``, shape ``(N, 4)``.
             intrinsics: Camera intrinsic matrices, shape ``(N, 3, 3)``.
         """
         pass
diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index c481002e524e..2ba26c672c73 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -26,6 +26,7 @@
     create_rotation_matrix_from_view,
     quat_from_matrix,
 )
+from isaaclab.utils.warp.proxy_array import ProxyArray
 
 from ..sensor_base import SensorBase
 from .camera_data import CameraData, RenderBufferKind
@@ -170,8 +171,13 @@ def data(self) -> CameraData:
         return self._data
 
     @property
-    def frame(self) -> torch.tensor:
-        """Frame number when the measurement took place."""
+    def frame(self) -> ProxyArray:
+        """Frame number when the measurement took place.
+
+        Shape is (N,), dtype ``wp.int64``. Use ``.torch`` for a zero-copy
+        :class:`torch.Tensor` view or ``.warp`` for the underlying
+        :class:`warp.array`.
+        """
         return self._frame
 
     @property
@@ -184,7 +190,10 @@ def image_shape(self) -> tuple[int, int]:
     """
 
     def set_intrinsic_matrices(
-        self, matrices: torch.Tensor, focal_length: float | None = None, env_ids: Sequence[int] | None = None
+        self,
+        matrices: wp.array | ProxyArray | torch.Tensor | np.ndarray,
+        focal_length: float | None = None,
+        env_ids: Sequence[int] | None = None,
     ):
         """Set parameters of the USD camera from its intrinsic matrix.
 
@@ -203,7 +212,10 @@ def set_intrinsic_matrices(
             is not true in the input intrinsic matrix, then the camera will not set up correctly.
 
         Args:
-            matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3).
+            matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). A
+                :class:`warp.array` is the canonical input type;
+                :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`,
+                :class:`torch.Tensor`, and :class:`numpy.ndarray` are also accepted.
             focal_length: Perspective focal length (in cm) used to calculate pixel size. Defaults to None. If None,
                 focal_length will be calculated 1 / width.
             env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
@@ -211,7 +223,11 @@ def set_intrinsic_matrices(
         # resolve env_ids
         if env_ids is None:
             env_ids = self._ALL_INDICES
-        # convert matrices to numpy tensors
+        # normalize input: ProxyArray → wp.array → torch.Tensor → numpy
+        if isinstance(matrices, ProxyArray):
+            matrices = matrices.torch
+        elif isinstance(matrices, wp.array):
+            matrices = wp.to_torch(matrices)
         if isinstance(matrices, torch.Tensor):
             matrices = matrices.cpu().numpy()
         else:
@@ -246,8 +262,8 @@ def set_intrinsic_matrices(
 
     def set_world_poses(
         self,
-        positions: torch.Tensor | None = None,
-        orientations: torch.Tensor | None = None,
+        positions: wp.array | ProxyArray | torch.Tensor | np.ndarray | None = None,
+        orientations: wp.array | ProxyArray | torch.Tensor | np.ndarray | None = None,
         env_ids: Sequence[int] | None = None,
         convention: Literal["opengl", "ros", "world"] = "ros",
     ):
@@ -264,10 +280,13 @@ def set_world_poses(
         on the conventions.
 
         Args:
-            positions: The cartesian coordinates (in meters). Shape is (N, 3).
-                Defaults to None, in which case the camera position in not changed.
+            positions: The cartesian coordinates [m]. Shape is (N, 3). A :class:`warp.array`
+                is the canonical input type; :class:`ProxyArray`, :class:`torch.Tensor`,
+                and :class:`numpy.ndarray` are also accepted. Defaults to None, in which
+                case the camera position is not changed.
             orientations: The quaternion orientation in (x, y, z, w). Shape is (N, 4).
-                Defaults to None, in which case the camera orientation in not changed.
+                Same accepted types as ``positions``. Defaults to None, in which case
+                the camera orientation is not changed.
             env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
             convention: The convention in which the poses are fed. Defaults to "ros".
 
@@ -277,18 +296,12 @@ def set_world_poses(
         # resolve env_ids
         if env_ids is None:
             env_ids = self._ALL_INDICES
-        # convert to backend tensor
+        # normalize positions to a torch tensor on the sensor device
         if positions is not None:
-            if isinstance(positions, np.ndarray):
-                positions = torch.from_numpy(positions).to(device=self._device)
-            elif not isinstance(positions, torch.Tensor):
-                positions = torch.tensor(positions, device=self._device)
+            positions = self._to_device_tensor(positions)
         # convert rotation matrix from input convention to OpenGL
         if orientations is not None:
-            if isinstance(orientations, np.ndarray):
-                orientations = torch.from_numpy(orientations).to(device=self._device)
-            elif not isinstance(orientations, torch.Tensor):
-                orientations = torch.tensor(orientations, device=self._device)
+            orientations = self._to_device_tensor(orientations)
             orientations = convert_camera_frame_orientation_convention(orientations, origin=convention, target="opengl")
         # convert torch tensors to warp arrays for the view
         pos_wp = wp.from_torch(positions.contiguous()) if positions is not None else None
@@ -302,13 +315,19 @@ def set_world_poses(
         self._view.set_world_poses(pos_wp, ori_wp, idx_wp)
 
     def set_world_poses_from_view(
-        self, eyes: torch.Tensor, targets: torch.Tensor, env_ids: Sequence[int] | None = None
+        self,
+        eyes: wp.array | ProxyArray | torch.Tensor | np.ndarray,
+        targets: wp.array | ProxyArray | torch.Tensor | np.ndarray,
+        env_ids: Sequence[int] | None = None,
     ):
         """Set the poses of the camera from the eye position and look-at target position.
 
         Args:
-            eyes: The positions of the camera's eye. Shape is (N, 3).
-            targets: The target locations to look at. Shape is (N, 3).
+            eyes: The positions of the camera's eye. Shape is (N, 3). A :class:`warp.array`
+                is the canonical input type; :class:`ProxyArray`, :class:`torch.Tensor`,
+                and :class:`numpy.ndarray` are also accepted.
+            targets: The target locations to look at. Shape is (N, 3). Same accepted types
+                as ``eyes``.
             env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
 
         Raises:
@@ -318,6 +337,9 @@ def set_world_poses_from_view(
         # resolve env_ids
         if env_ids is None:
             env_ids = self._ALL_INDICES
+        # normalize inputs to torch tensors on the sensor device
+        eyes = self._to_device_tensor(eyes)
+        targets = self._to_device_tensor(targets)
         # get up axis of current stage
         up_axis = UsdGeom.GetStageUpAxis(self.stage)
         # set camera poses using the view
@@ -346,8 +368,8 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None
         # reset the data
         # note: this recomputation is useful if one performs events such as randomizations on the camera poses.
         self._update_poses(env_ids)
-        # Reset the frame count
-        self._frame[env_ids] = 0
+        # Reset the frame count (ProxyArray; mutate via the zero-copy torch view)
+        self._frame.torch[env_ids] = 0
 
     """
     Implementation.
@@ -390,8 +412,10 @@ def _initialize_impl(self):
 
         # Create all env_ids buffer
         self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long)
-        # Create frame count buffer
-        self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long)
+        # Create frame count buffer (wp.array primary, ProxyArray-published).
+        self._frame = ProxyArray(
+            wp.from_torch(torch.zeros(self._view.count, device=self._device, dtype=torch.long).contiguous())
+        )
 
         # Convert all encapsulated prims to Camera
         for cam_prim in self._view.prims:
@@ -427,8 +451,8 @@ def _update_buffers_impl(self, env_mask: wp.array):
         env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1)
         if len(env_ids) == 0:
             return
-        # Increment frame count
-        self._frame[env_ids] += 1
+        # Increment frame count (ProxyArray; mutate via the zero-copy torch view)
+        self._frame.torch[env_ids] += 1
         # update latest camera pose if requested
         if self.cfg.update_latest_camera_pose:
             self._update_poses(env_ids)
@@ -451,6 +475,23 @@ def _update_buffers_impl(self, env_mask: wp.array):
     Private Helpers
     """
 
+    def _to_device_tensor(self, value: wp.array | ProxyArray | torch.Tensor | np.ndarray) -> torch.Tensor:
+        """Normalize an array-like input to a ``torch.Tensor`` on this sensor's device.
+
+        Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`,
+        :class:`torch.Tensor`, and :class:`numpy.ndarray`. Warp inputs are unwrapped via
+        ``wp.to_torch`` (zero-copy).
+        """
+        if isinstance(value, ProxyArray):
+            value = value.torch
+        elif isinstance(value, wp.array):
+            value = wp.to_torch(value)
+        if isinstance(value, np.ndarray):
+            return torch.from_numpy(value).to(device=self._device)
+        if not isinstance(value, torch.Tensor):
+            return torch.tensor(value, device=self._device)
+        return value.to(device=self._device)
+
     def _check_supported_data_types(self, cfg: CameraCfg):
         """Checks if the data types are supported by the ray-caster camera."""
         # check if there is any intersection in unsupported types
@@ -501,12 +542,22 @@ def _create_buffers(self):
         )
         # Camera-frame state (pose / intrinsics) is owned by the camera, not
         # the renderer: populate it on the freshly constructed ``CameraData``.
-        self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device)
+        # Each field stores a wp.array primary buffer (allocated via wp.from_torch
+        # over a torch.zeros allocation) and is published as a ProxyArray.
+        self._data.intrinsic_matrices = ProxyArray(
+            wp.from_torch(torch.zeros((self._view.count, 3, 3), device=self._device).contiguous())
+        )
         self._update_intrinsic_matrices(self._ALL_INDICES)
-        self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device)
-        self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device)
+        self._data.pos_w = ProxyArray(
+            wp.from_torch(torch.zeros((self._view.count, 3), device=self._device).contiguous())
+        )
+        self._data.quat_w_world = ProxyArray(
+            wp.from_torch(torch.zeros((self._view.count, 4), device=self._device).contiguous())
+        )
         self._update_poses(self._ALL_INDICES)
-        self._renderer.set_outputs(self._render_data, self._data.output)
+        # Hand the renderer the underlying wp.array buffers; the ProxyArray
+        # wrappers in CameraData stay valid since both views share memory.
+        self._renderer.set_outputs(self._render_data, {name: proxy.warp for name, proxy in self._data.output.items()})
 
     def _update_intrinsic_matrices(self, env_ids: Sequence[int]):
         """Compute camera's matrix of intrinsic parameters.
@@ -517,6 +568,9 @@ def _update_intrinsic_matrices(self, env_ids: Sequence[int]):
             The calibration matrix projects points in the 3D scene onto an imaginary screen of the camera.
             The coordinates of points on the image plane are in the homogeneous representation.
         """
+        # zero-copy torch view of the wp.array buffer; in-place mutation is
+        # visible through both views.
+        intrinsic_view = self._data.intrinsic_matrices.torch
         # iterate over all cameras
         for i in env_ids:
             # Get corresponding sensor prim
@@ -534,11 +588,11 @@ def _update_intrinsic_matrices(self, env_ids: Sequence[int]):
             c_x = width * 0.5
             c_y = height * 0.5
             # create intrinsic matrix for depth linear
-            self._data.intrinsic_matrices[i, 0, 0] = f_x
-            self._data.intrinsic_matrices[i, 0, 2] = c_x
-            self._data.intrinsic_matrices[i, 1, 1] = f_y
-            self._data.intrinsic_matrices[i, 1, 2] = c_y
-            self._data.intrinsic_matrices[i, 2, 2] = 1
+            intrinsic_view[i, 0, 0] = f_x
+            intrinsic_view[i, 0, 2] = c_x
+            intrinsic_view[i, 1, 1] = f_y
+            intrinsic_view[i, 1, 2] = c_y
+            intrinsic_view[i, 2, 2] = 1
 
     def _update_poses(self, env_ids: Sequence[int]):
         """Computes the pose of the camera in the world frame with ROS convention.
@@ -558,14 +612,19 @@ def _update_poses(self, env_ids: Sequence[int]):
             env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device)
         indices = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32) if env_ids is not None else None
         pos_w, quat_w = self._view.get_world_poses(indices)
-        self._data.pos_w[env_ids] = pos_w.torch
-        self._data.quat_w_world[env_ids] = convert_camera_frame_orientation_convention(
+        # Mutate via the zero-copy torch view to avoid the ProxyArray.__setitem__
+        # deprecation warning (the view shares memory with the wp.array primary).
+        self._data.pos_w.torch[env_ids] = pos_w.torch
+        self._data.quat_w_world.torch[env_ids] = convert_camera_frame_orientation_convention(
             quat_w.torch, origin="opengl", target="world"
         )
         # notify renderer of updated poses (guarded in case called before initialization completes)
         if self._render_data is not None:
             self._renderer.update_camera(
-                self._render_data, self._data.pos_w, self._data.quat_w_world, self._data.intrinsic_matrices
+                self._render_data,
+                self._data.pos_w.warp,
+                self._data.quat_w_world.warp,
+                self._data.intrinsic_matrices.warp,
             )
 
     """
diff --git a/source/isaaclab/isaaclab/sensors/camera/camera_data.py b/source/isaaclab/isaaclab/sensors/camera/camera_data.py
index 6ec5484531b2..40595e40d1c5 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera_data.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera_data.py
@@ -9,29 +9,41 @@
 from typing import Any
 
 import torch
+import warp as wp
 
 # Re-exported as part of the public isaaclab.sensors.camera API
 from isaaclab.renderers.output_contract import RenderBufferKind, RenderBufferSpec
 from isaaclab.utils.math import convert_camera_frame_orientation_convention
+from isaaclab.utils.warp.proxy_array import ProxyArray
 
 __all__ = ["CameraData", "RenderBufferKind", "RenderBufferSpec"]
 
 
+def _wrap(buf: torch.Tensor) -> ProxyArray:
+    """Wrap a torch buffer as a :class:`ProxyArray` with a zero-copy ``wp.array`` view."""
+    return ProxyArray(wp.from_torch(buf))
+
+
 @dataclass
 class CameraData:
-    """Data container for the camera sensor."""
+    """Data container for the camera sensor.
+
+    All array-valued fields are :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`
+    wrappers around an underlying :class:`warp.array` (Warp-primary storage with a
+    cached zero-copy ``torch.Tensor`` view available via ``.torch``).
+    """
 
     ##
     # Frame state.
     ##
 
-    pos_w: torch.Tensor = None
-    """Position of the sensor origin in world frame, following ROS convention.
+    pos_w: ProxyArray = None
+    """Position [m] of the sensor origin in world frame, following ROS convention.
 
     Shape is (N, 3) where N is the number of sensors.
     """
 
-    quat_w_world: torch.Tensor = None
+    quat_w_world: ProxyArray = None
     """Quaternion orientation `(x, y, z, w)` of the sensor origin in world frame, following the world coordinate frame
 
     .. note::
@@ -47,13 +59,13 @@ class CameraData:
     image_shape: tuple[int, int] = None
     """A tuple containing (height, width) of the camera sensor."""
 
-    intrinsic_matrices: torch.Tensor = None
+    intrinsic_matrices: ProxyArray = None
     """The intrinsic matrices for the camera.
 
     Shape is (N, 3, 3) where N is the number of sensors.
     """
 
-    output: dict[str, torch.Tensor] = None
+    output: dict[str, ProxyArray] = None
     """The retrieved sensor data with sensor types as key.
 
     The format of the data is available in the `Replicator Documentation`_. For semantic-based data,
@@ -82,9 +94,11 @@ def allocate(
     ) -> CameraData:
         """Build a :class:`CameraData` with output buffers pre-allocated.
 
-        Allocates one ``(num_views, height, width, channels)`` tensor per kind
+        Allocates one ``(num_views, height, width, channels)`` buffer per kind
         in the intersection of ``data_types`` and ``supported_specs``, using
-        the channels and dtype from each :class:`RenderBufferSpec`.
+        the channels and dtype from each :class:`RenderBufferSpec`. Each buffer
+        is exposed as a :class:`ProxyArray` (``wp.array`` primary, zero-copy
+        ``torch.Tensor`` view via ``.torch``).
 
         Args:
             data_types: Requested output names (typically :attr:`CameraCfg.data_types`).
@@ -124,19 +138,28 @@ def allocate(
         if rgb_alias:
             requested.update({RenderBufferKind.RGB, RenderBufferKind.RGBA})
 
-        buffers: dict[str, torch.Tensor] = {}
+        # Allocate the underlying torch buffers first so we can preserve the
+        # rgb-as-view-of-rgba alias as a torch slice (shared memory). Each
+        # buffer is then wrapped as a ProxyArray over a wp.from_torch view —
+        # the wp.array and torch.Tensor share the same allocation.
+        torch_buffers: dict[str, torch.Tensor] = {}
         for name, spec in supported_specs.items():
             if name not in requested:
                 continue
             if rgb_alias and name == RenderBufferKind.RGB:
                 continue
-            buffers[str(name)] = torch.zeros(
+            torch_buffers[str(name)] = torch.zeros(
                 (num_views, height, width, spec.channels),
                 dtype=spec.dtype,
                 device=device,
             ).contiguous()
         if rgb_alias:
-            buffers[str(RenderBufferKind.RGB)] = buffers[str(RenderBufferKind.RGBA)][..., :3]
+            # Share storage with rgba: the slice is a non-contiguous torch view.
+            # ``wp.from_torch`` accepts strided views, so the resulting ProxyArray
+            # tracks the same underlying memory as the rgba buffer.
+            torch_buffers[str(RenderBufferKind.RGB)] = torch_buffers[str(RenderBufferKind.RGBA)][..., :3]
+
+        buffers: dict[str, ProxyArray] = {name: _wrap(buf) for name, buf in torch_buffers.items()}
 
         return cls(
             image_shape=(height, width),
@@ -149,7 +172,7 @@ def allocate(
     ##
 
     @property
-    def quat_w_ros(self) -> torch.Tensor:
+    def quat_w_ros(self) -> ProxyArray:
         """Quaternion orientation `(x, y, z, w)` of the sensor origin in the world frame, following ROS convention.
 
         .. note::
@@ -157,10 +180,11 @@ def quat_w_ros(self) -> torch.Tensor:
 
         Shape is (N, 4) where N is the number of sensors.
         """
-        return convert_camera_frame_orientation_convention(self.quat_w_world, origin="world", target="ros")
+        converted = convert_camera_frame_orientation_convention(self.quat_w_world.torch, origin="world", target="ros")
+        return _wrap(converted.contiguous())
 
     @property
-    def quat_w_opengl(self) -> torch.Tensor:
+    def quat_w_opengl(self) -> ProxyArray:
         """Quaternion orientation `(x, y, z, w)` of the sensor origin in the world frame, following
         Opengl / USD Camera convention.
 
@@ -169,4 +193,7 @@ def quat_w_opengl(self) -> torch.Tensor:
 
         Shape is (N, 4) where N is the number of sensors.
         """
-        return convert_camera_frame_orientation_convention(self.quat_w_world, origin="world", target="opengl")
+        converted = convert_camera_frame_orientation_convention(
+            self.quat_w_world.torch, origin="world", target="opengl"
+        )
+        return _wrap(converted.contiguous())
diff --git a/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera.py b/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera.py
index a14c16f3d0d9..a706385e6fb4 100644
--- a/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera.py
+++ b/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera.py
@@ -12,6 +12,7 @@
 
 import isaaclab.utils.math as math_utils
 from isaaclab.utils.warp import kernels as warp_kernels
+from isaaclab.utils.warp.proxy_array import ProxyArray
 
 from .kernels import (
     CAMERA_RAYCAST_MAX_DIST,
@@ -84,9 +85,11 @@ def _initialize_warp_meshes(self):
 
     def _create_buffers(self):
         super()._create_buffers()
-        self._data.image_mesh_ids = torch.zeros(
+        # image_mesh_ids primary buffer: torch.zeros + wp.from_torch share memory.
+        image_mesh_ids_torch = torch.zeros(
             self._num_envs, *self.image_shape, 1, device=self.device, dtype=torch.int16
-        )
+        ).contiguous()
+        self._data.image_mesh_ids = ProxyArray(wp.from_torch(image_mesh_ids_torch))
 
     def _initialize_rays_impl(self):
         # NOTE: This method intentionally does NOT call super()._initialize_rays_impl() through the MRO
@@ -99,7 +102,10 @@ def _initialize_rays_impl(self):
 
         # Camera-specific bookkeeping buffers
         self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long)
-        self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long)
+        # Frame count: wp.array primary, ProxyArray-published.
+        self._frame = ProxyArray(
+            wp.from_torch(torch.zeros(self._view.count, device=self._device, dtype=torch.long).contiguous())
+        )
 
         # Build camera output buffers (intrinsics, image data, etc.)
         self._create_buffers()
@@ -107,7 +113,7 @@ def _initialize_rays_impl(self):
 
         # Compute local ray starts/directions from the camera pattern (torch, init-time only)
         ray_starts_local, ray_directions_local = self.cfg.pattern_cfg.func(
-            self.cfg.pattern_cfg, self._data.intrinsic_matrices, self._device
+            self.cfg.pattern_cfg, self._data.intrinsic_matrices.torch, self._device
         )
         self.num_rays = ray_directions_local.shape[1]
 
@@ -124,9 +130,8 @@ def _initialize_rays_impl(self):
         self._offset_quat = quat_offset.repeat(self._view.count, 1)
         self._offset_pos = torch.tensor(list(self.cfg.offset.pos), device=self._device).repeat(self._view.count, 1)
 
-        # Camera pose buffers (torch, part of CameraData)
-        self._data.pos_w = torch.zeros(self._view.count, 3, device=self._device)
-        self._data.quat_w_world = torch.zeros(self._view.count, 4, device=self._device)
+        # Camera pose buffers (pos_w / quat_w_world) were already allocated as
+        # ProxyArrays in self._create_buffers() above; no re-init needed here.
         # Warp-backed camera orientation buffer for warp kernel calls;
         # updated from self._data.quat_w_world in _update_ray_infos.
         self._quat_w_wp = wp.zeros(self._view.count, dtype=wp.quatf, device=self._device)
@@ -183,9 +188,9 @@ def _update_ray_infos(self, env_mask: wp.array):
         pos_w, quat_w = math_utils.combine_frame_transforms(
             pos_w, quat_w, self._offset_pos[env_ids], self._offset_quat[env_ids]
         )
-        # Store camera pose in CameraData (torch tensors) and warp-backed orientation buffer
-        self._data.pos_w[env_ids] = pos_w
-        self._data.quat_w_world[env_ids] = quat_w
+        # Store camera pose in CameraData via the zero-copy torch view of the wp.array primary
+        self._data.pos_w.torch[env_ids] = pos_w
+        self._data.quat_w_world.torch[env_ids] = quat_w
         self._quat_w_wp_torch[env_ids] = quat_w
 
         # Rotate local ray starts and directions into world frame using full camera orientation
@@ -211,8 +216,8 @@ def _update_buffers_impl(self, env_mask: wp.array):
 
         self._update_ray_infos(env_mask)
 
-        # Increment frame count for updated environments
-        self._frame[env_ids] += 1
+        # Increment frame count for updated environments (ProxyArray; mutate via .torch)
+        self._frame.torch[env_ids] += 1
 
         self._update_mesh_transforms()
 
@@ -275,19 +280,23 @@ def _update_buffers_impl(self, env_mask: wp.array):
             # Apply depth clipping on the intermediate buffer (leaves _ray_distance_cam_w unmodified)
             self._apply_depth_clipping(env_mask, self._distance_to_image_plane_wp)
             d2ip_torch = wp.to_torch(self._distance_to_image_plane_wp)
-            self._data.output["distance_to_image_plane"][env_ids] = d2ip_torch[env_ids].view(-1, *self.image_shape, 1)
+            self._data.output["distance_to_image_plane"].torch[env_ids] = d2ip_torch[env_ids].view(
+                -1, *self.image_shape, 1
+            )
 
         if "distance_to_camera" in self.cfg.data_types:
             # d2ip (if requested) was computed before this block so _ray_distance_cam_w is still unclipped.
             self._apply_depth_clipping(env_mask, self._ray_distance_cam_w)
             ray_dist_torch = wp.to_torch(self._ray_distance_cam_w)
-            self._data.output["distance_to_camera"][env_ids] = ray_dist_torch[env_ids].view(-1, *self.image_shape, 1)
+            self._data.output["distance_to_camera"].torch[env_ids] = ray_dist_torch[env_ids].view(
+                -1, *self.image_shape, 1
+            )
 
         if return_normal:
             ray_normal_torch = wp.to_torch(self._ray_normal_w)
-            self._data.output["normals"][env_ids] = ray_normal_torch[env_ids].view(-1, *self.image_shape, 3)
+            self._data.output["normals"].torch[env_ids] = ray_normal_torch[env_ids].view(-1, *self.image_shape, 3)
 
         if self.cfg.update_mesh_ids:
-            self._data.image_mesh_ids[env_ids] = wp.to_torch(self._ray_mesh_id_w)[env_ids].view(
+            self._data.image_mesh_ids.torch[env_ids] = wp.to_torch(self._ray_mesh_id_w)[env_ids].view(
                 -1, *self.image_shape, 1
             )
diff --git a/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera_data.py b/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera_data.py
index 21338f0a0616..8c90e77b7ddf 100644
--- a/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera_data.py
+++ b/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera_data.py
@@ -5,9 +5,8 @@
 
 """Data container for the multi-mesh ray-cast camera sensor."""
 
-import torch
-
 from isaaclab.sensors.camera import CameraData
+from isaaclab.utils.warp.proxy_array import ProxyArray
 
 
 class MultiMeshRayCasterCameraData(CameraData):
@@ -15,13 +14,13 @@ class MultiMeshRayCasterCameraData(CameraData):
 
     This class extends :class:`CameraData` with additional mesh-id information.
     It does not inherit from :class:`RayCasterData` because the camera variant
-    manages its own torch-based pose and hit buffers independently from the
+    manages its own pose and hit buffers independently from the
     warp-native :class:`RayCasterData`.
     """
 
-    image_mesh_ids: torch.Tensor = None
+    image_mesh_ids: ProxyArray = None
     """The mesh ids of the image pixels.
 
     Shape is (N, H, W, 1), where N is the number of sensors, H and W are the height and width of the image,
-    and 1 is the number of mesh ids per pixel.
+    and 1 is the number of mesh ids per pixel. Stored as a ``wp.array`` of dtype ``wp.int16``.
     """
diff --git a/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py b/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py
index 257b25698deb..56a8078c78b1 100644
--- a/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py
+++ b/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py
@@ -17,6 +17,7 @@
 import isaaclab.utils.math as math_utils
 from isaaclab.sensors.camera import CameraData
 from isaaclab.utils.warp.kernels import raycast_mesh_masked_kernel
+from isaaclab.utils.warp.proxy_array import ProxyArray
 
 from .kernels import (
     ALIGNMENT_BASE,
@@ -36,6 +37,19 @@
 logger = logging.getLogger(__name__)
 
 
+def _to_device_tensor(value: wp.array | ProxyArray | torch.Tensor, device: str | torch.device) -> torch.Tensor:
+    """Normalize an array-like input to a ``torch.Tensor`` on ``device``.
+
+    Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`,
+    and :class:`torch.Tensor`. Warp inputs are unwrapped via ``wp.to_torch`` (zero-copy).
+    """
+    if isinstance(value, ProxyArray):
+        value = value.torch
+    elif isinstance(value, wp.array):
+        value = wp.to_torch(value)
+    return value.to(device=device)
+
+
 class RayCasterCamera(RayCaster):
     """A ray-casting camera sensor.
 
@@ -123,8 +137,13 @@ def image_shape(self) -> tuple[int, int]:
         return (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width)
 
     @property
-    def frame(self) -> torch.tensor:
-        """Frame number when the measurement took place."""
+    def frame(self) -> ProxyArray:
+        """Frame number when the measurement took place.
+
+        Shape is (N,), dtype ``wp.int64``. Use ``.torch`` for a zero-copy
+        :class:`torch.Tensor` view or ``.warp`` for the underlying
+        :class:`warp.array`.
+        """
         return self._frame
 
     """
@@ -132,24 +151,32 @@ def frame(self) -> torch.tensor:
     """
 
     def set_intrinsic_matrices(
-        self, matrices: torch.Tensor, focal_length: float = 1.0, env_ids: Sequence[int] | None = None
+        self,
+        matrices: wp.array | ProxyArray | torch.Tensor,
+        focal_length: float = 1.0,
+        env_ids: Sequence[int] | None = None,
     ):
         """Set the intrinsic matrix of the camera.
 
         Args:
-            matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3).
+            matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). A
+                :class:`warp.array` is the canonical input type; :class:`ProxyArray` and
+                :class:`torch.Tensor` are also accepted.
             focal_length: Focal length to use when computing aperture values (in cm). Defaults to 1.0.
             env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
         """
         # resolve env_ids
         if env_ids is None:
             env_ids = slice(None)
-        # save new intrinsic matrices and focal length
-        self._data.intrinsic_matrices[env_ids] = matrices.to(self._device)
+        # normalize matrices input to torch on sensor device
+        matrices = _to_device_tensor(matrices, self._device)
+        # save new intrinsic matrices and focal length (write via the zero-copy torch view)
+        intrinsic_view = self._data.intrinsic_matrices.torch
+        intrinsic_view[env_ids] = matrices
         self._focal_length = focal_length
         # recompute ray directions
         self.ray_starts[env_ids], self.ray_directions[env_ids] = self.cfg.pattern_cfg.func(
-            self.cfg.pattern_cfg, self._data.intrinsic_matrices[env_ids], self._device
+            self.cfg.pattern_cfg, intrinsic_view[env_ids], self._device
         )
         # Refresh warp views of local ray buffers; .contiguous() may produce a copy so we store
         # the contiguous tensors explicitly to prevent GC while the warp views are alive.
@@ -176,15 +203,15 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None
         pos_w, quat_w = math_utils.combine_frame_transforms(
             pos_w.torch.clone(), quat_w.torch.clone(), self._offset_pos[env_ids], self._offset_quat[env_ids]
         )
-        self._data.pos_w[env_ids] = pos_w
-        self._data.quat_w_world[env_ids] = quat_w
-        # Reset the frame count
-        self._frame[env_ids] = 0
+        self._data.pos_w.torch[env_ids] = pos_w
+        self._data.quat_w_world.torch[env_ids] = quat_w
+        # Reset the frame count (ProxyArray; mutate via the zero-copy torch view)
+        self._frame.torch[env_ids] = 0
 
     def set_world_poses(
         self,
-        positions: torch.Tensor | None = None,
-        orientations: torch.Tensor | None = None,
+        positions: wp.array | ProxyArray | torch.Tensor | None = None,
+        orientations: wp.array | ProxyArray | torch.Tensor | None = None,
         env_ids: Sequence[int] | None = None,
         convention: Literal["opengl", "ros", "world"] = "ros",
     ):
@@ -201,10 +228,13 @@ def set_world_poses(
         on the conventions.
 
         Args:
-            positions: The cartesian coordinates (in meters). Shape is (N, 3).
-                Defaults to None, in which case the camera position in not changed.
+            positions: The cartesian coordinates [m]. Shape is (N, 3). A :class:`warp.array`
+                is the canonical input type; :class:`ProxyArray` and :class:`torch.Tensor`
+                are also accepted. Defaults to None, in which case the camera position is
+                not changed.
             orientations: The quaternion orientation in (x, y, z, w). Shape is (N, 4).
-                Defaults to None, in which case the camera orientation in not changed.
+                Same accepted types as ``positions``. Defaults to None, in which case
+                the camera orientation is not changed.
             env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
             convention: The convention in which the poses are fed. Defaults to "ros".
 
@@ -221,10 +251,12 @@ def set_world_poses(
         pos_w_torch = pos_w.torch
         quat_w_torch = quat_w.torch
         if positions is not None:
+            positions = _to_device_tensor(positions, self._device)
             # transform to camera frame
             pos_offset_world_frame = positions - pos_w_torch
             self._offset_pos[env_ids] = math_utils.quat_apply(math_utils.quat_inv(quat_w_torch), pos_offset_world_frame)
         if orientations is not None:
+            orientations = _to_device_tensor(orientations, self._device)
             # convert rotation matrix from input convention to world
             quat_w_set = math_utils.convert_camera_frame_orientation_convention(
                 orientations, origin=convention, target="world"
@@ -236,23 +268,31 @@ def set_world_poses(
         pos_w_out, quat_w_out = math_utils.combine_frame_transforms(
             pos_w2.torch.clone(), quat_w2.torch.clone(), self._offset_pos[env_ids], self._offset_quat[env_ids]
         )
-        self._data.pos_w[env_ids] = pos_w_out
-        self._data.quat_w_world[env_ids] = quat_w_out
+        self._data.pos_w.torch[env_ids] = pos_w_out
+        self._data.quat_w_world.torch[env_ids] = quat_w_out
 
     def set_world_poses_from_view(
-        self, eyes: torch.Tensor, targets: torch.Tensor, env_ids: Sequence[int] | None = None
+        self,
+        eyes: wp.array | ProxyArray | torch.Tensor,
+        targets: wp.array | ProxyArray | torch.Tensor,
+        env_ids: Sequence[int] | None = None,
     ):
         """Set the poses of the camera from the eye position and look-at target position.
 
         Args:
-            eyes: The positions of the camera's eye. Shape is (N, 3).
-            targets: The target locations to look at. Shape is (N, 3).
+            eyes: The positions of the camera's eye. Shape is (N, 3). A :class:`warp.array`
+                is the canonical input type; :class:`ProxyArray` and :class:`torch.Tensor`
+                are also accepted.
+            targets: The target locations to look at. Shape is (N, 3). Same accepted types
+                as ``eyes``.
             env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
 
         Raises:
             RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
             NotImplementedError: If the stage up-axis is not "Y" or "Z".
         """
+        eyes = _to_device_tensor(eyes, self._device)
+        targets = _to_device_tensor(targets, self._device)
         # get up axis of current stage
         up_axis = UsdGeom.GetStageUpAxis(self.stage)
         # camera position and rotation in opengl convention
@@ -268,15 +308,17 @@ def set_world_poses_from_view(
     def _initialize_rays_impl(self):
         # Create all indices buffer
         self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long)
-        # Create frame count buffer
-        self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long)
+        # Create frame count buffer (wp.array primary, ProxyArray-published).
+        self._frame = ProxyArray(
+            wp.from_torch(torch.zeros(self._view.count, device=self._device, dtype=torch.long).contiguous())
+        )
         # create buffers
         self._create_buffers()
         # compute intrinsic matrices
         self._compute_intrinsic_matrices()
-        # compute ray starts and directions
+        # compute ray starts and directions (pattern_cfg.func expects a torch.Tensor)
         self.ray_starts, self.ray_directions = self.cfg.pattern_cfg.func(
-            self.cfg.pattern_cfg, self._data.intrinsic_matrices, self._device
+            self.cfg.pattern_cfg, self._data.intrinsic_matrices.torch, self._device
         )
         self.num_rays = self.ray_directions.shape[1]
 
@@ -349,8 +391,8 @@ def _update_buffers_impl(self, env_mask: wp.array):
         env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1)
         if len(env_ids) == 0:
             return
-        # increment frame count
-        self._frame[env_ids] += 1
+        # increment frame count (ProxyArray; mutate via the zero-copy torch view)
+        self._frame.torch[env_ids] += 1
 
         # Update world-frame ray starts/directions and camera pose via warp kernel.
         # Camera always uses ALIGNMENT_BASE (full orientation) and zero drift.
@@ -378,9 +420,9 @@ def _update_buffers_impl(self, env_mask: wp.array):
             device=self._device,
         )
 
-        # Write camera pose to CameraData (torch tensors)
-        self._data.pos_w[env_ids] = self._pos_w_torch[env_ids]
-        self._data.quat_w_world[env_ids] = self._quat_w_torch[env_ids]
+        # Write camera pose to CameraData via the zero-copy torch view of the wp.array primary
+        self._data.pos_w.torch[env_ids] = self._pos_w_torch[env_ids]
+        self._data.quat_w_world.torch[env_ids] = self._quat_w_torch[env_ids]
 
         # Fill ray hit positions with inf before raycasting
         wp.launch(
@@ -447,19 +489,21 @@ def _update_buffers_impl(self, env_mask: wp.array):
             )
             # Apply depth clipping on the intermediate buffer (leaves _ray_distance unmodified)
             self._apply_depth_clipping(env_mask, self._distance_to_image_plane_wp)
-            self._data.output["distance_to_image_plane"][env_ids] = self._distance_to_image_plane_torch[env_ids].view(
-                -1, *self.image_shape, 1
-            )
+            self._data.output["distance_to_image_plane"].torch[env_ids] = self._distance_to_image_plane_torch[
+                env_ids
+            ].view(-1, *self.image_shape, 1)
 
         if "distance_to_camera" in self.cfg.data_types:
             # d2ip (if requested) was computed before this block so _ray_distance is still unclipped.
             self._apply_depth_clipping(env_mask, self._ray_distance)
-            self._data.output["distance_to_camera"][env_ids] = self._ray_distance_torch[env_ids].view(
+            self._data.output["distance_to_camera"].torch[env_ids] = self._ray_distance_torch[env_ids].view(
                 -1, *self.image_shape, 1
             )
 
         if "normals" in self.cfg.data_types:
-            self._data.output["normals"][env_ids] = self._ray_normal_w_torch[env_ids].view(-1, *self.image_shape, 3)
+            self._data.output["normals"].torch[env_ids] = self._ray_normal_w_torch[env_ids].view(
+                -1, *self.image_shape, 3
+            )
 
     def _debug_vis_callback(self, event):
         # in case it crashes be safe
@@ -525,17 +569,28 @@ def _check_supported_data_types(self, cfg: RayCasterCameraCfg):
             )
 
     def _create_buffers(self):
-        """Create buffers for storing data."""
+        """Create buffers for storing data.
+
+        Each :class:`CameraData` field stores a ``wp.array`` primary buffer
+        (allocated via ``wp.from_torch`` over a contiguous ``torch.zeros`` tensor)
+        and is exposed as a :class:`ProxyArray`. The torch view is the canonical
+        write target for in-place updates from python code.
+        """
         # prepare drift (kept as torch tensors so subclasses may use torch indexing)
         self.drift = torch.zeros(self._view.count, 3, device=self.device)
         self.ray_cast_drift = torch.zeros(self._view.count, 3, device=self.device)
         # create the data object
         # -- pose of the cameras
-        self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device)
-        self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device)
+        self._data.pos_w = ProxyArray(
+            wp.from_torch(torch.zeros((self._view.count, 3), device=self._device).contiguous())
+        )
+        self._data.quat_w_world = ProxyArray(
+            wp.from_torch(torch.zeros((self._view.count, 4), device=self._device).contiguous())
+        )
         # -- intrinsic matrix
-        self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device)
-        self._data.intrinsic_matrices[:, 2, 2] = 1.0
+        intrinsic_torch = torch.zeros((self._view.count, 3, 3), device=self._device).contiguous()
+        intrinsic_torch[:, 2, 2] = 1.0
+        self._data.intrinsic_matrices = ProxyArray(wp.from_torch(intrinsic_torch))
         self._data.image_shape = self.image_shape
         # -- output data
         # create the buffers to store the annotator data.
@@ -548,8 +603,9 @@ def _create_buffers(self):
                 shape = (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width, 3)
             else:
                 raise ValueError(f"Received unknown data type: {name}. Please check the configuration.")
-            # allocate tensor to store the data
-            self._data.output[name] = torch.zeros((self._view.count, *shape), device=self._device)
+            # allocate buffer (torch.zeros + wp.from_torch shares memory) and wrap as ProxyArray
+            buf = torch.zeros((self._view.count, *shape), device=self._device).contiguous()
+            self._data.output[name] = ProxyArray(wp.from_torch(buf))
 
     def _compute_intrinsic_matrices(self):
         """Computes the intrinsic matrices for the camera based on the config provided."""
@@ -566,11 +622,12 @@ def _compute_intrinsic_matrices(self):
         f_y = pattern_cfg.height * pattern_cfg.focal_length / pattern_cfg.vertical_aperture
         c_x = pattern_cfg.horizontal_aperture_offset * f_x + pattern_cfg.width / 2
         c_y = pattern_cfg.vertical_aperture_offset * f_y + pattern_cfg.height / 2
-        # allocate the intrinsic matrices
-        self._data.intrinsic_matrices[:, 0, 0] = f_x
-        self._data.intrinsic_matrices[:, 0, 2] = c_x
-        self._data.intrinsic_matrices[:, 1, 1] = f_y
-        self._data.intrinsic_matrices[:, 1, 2] = c_y
+        # write into the zero-copy torch view of the wp.array primary buffer
+        intrinsic_view = self._data.intrinsic_matrices.torch
+        intrinsic_view[:, 0, 0] = f_x
+        intrinsic_view[:, 0, 2] = c_x
+        intrinsic_view[:, 1, 1] = f_y
+        intrinsic_view[:, 1, 2] = c_y
 
         # save focal length
         self._focal_length = pattern_cfg.focal_length
diff --git a/source/isaaclab/test/renderers/test_camera_output_contract.py b/source/isaaclab/test/renderers/test_camera_output_contract.py
index 2d6087d29708..208a814a796a 100644
--- a/source/isaaclab/test/renderers/test_camera_output_contract.py
+++ b/source/isaaclab/test/renderers/test_camera_output_contract.py
@@ -158,11 +158,13 @@ def test_camera_data_allocates_supported_subset_and_aliases_rgb():
     )
 
     assert set(data.output.keys()) == {"rgba", "rgb", "depth"}
-    assert data.output["rgba"].shape == (2, 8, 16, 4)
-    assert data.output["rgba"].dtype == torch.uint8
-    assert data.output["depth"].shape == (2, 8, 16, 1)
-    assert data.output["depth"].dtype == torch.float32
-    assert data.output["rgb"].data_ptr() == data.output["rgba"].data_ptr()
+    # ProxyArray exposes torch dtype/shape via .torch and the underlying wp.array via .warp.
+    assert tuple(data.output["rgba"].shape) == (2, 8, 16, 4)
+    assert data.output["rgba"].torch.dtype == torch.uint8
+    assert tuple(data.output["depth"].shape) == (2, 8, 16, 1)
+    assert data.output["depth"].torch.dtype == torch.float32
+    # rgb is a non-contiguous torch view of rgba; both ProxyArrays share the same allocation.
+    assert data.output["rgb"].torch.data_ptr() == data.output["rgba"].torch.data_ptr()
     assert data.image_shape == (8, 16)
     assert data.info == {"rgba": None, "rgb": None, "depth": None}
 
@@ -206,10 +208,31 @@ def test_camera_data_segmentation_dtype_follows_supported_spec():
         data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=colorized_specs
     )
 
-    assert raw.output["instance_segmentation_fast"].dtype == torch.int32
-    assert raw.output["instance_segmentation_fast"].shape == (1, 4, 4, 1)
-    assert colorized.output["instance_segmentation_fast"].dtype == torch.uint8
-    assert colorized.output["instance_segmentation_fast"].shape == (1, 4, 4, 4)
+    assert raw.output["instance_segmentation_fast"].torch.dtype == torch.int32
+    assert tuple(raw.output["instance_segmentation_fast"].shape) == (1, 4, 4, 1)
+    assert colorized.output["instance_segmentation_fast"].torch.dtype == torch.uint8
+    assert tuple(colorized.output["instance_segmentation_fast"].shape) == (1, 4, 4, 4)
+
+
+def test_camera_data_output_proxyarray_zero_copy():
+    """Mutations through .torch are visible through .warp on every output buffer."""
+    cfg = _make_camera_cfg(["rgb", "rgba", "depth"])
+    specs = {
+        RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
+        RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
+        RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
+    }
+    data = CameraData.allocate(
+        data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=specs
+    )
+
+    # Write through the torch view; ensure the warp array sees the same memory.
+    rgba_proxy = data.output["rgba"]
+    rgba_proxy.torch[0, 0, 0, 0] = 7
+    assert rgba_proxy.warp.numpy()[0, 0, 0, 0] == 7
+
+    # The rgb proxy is a torch view of the rgba memory, so the same write is visible.
+    assert data.output["rgb"].torch[0, 0, 0, 0].item() == 7
 
 
 def test_camera_data_allocate_raises_on_unknown_name():
diff --git a/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py b/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py
index 720a85223aa6..ab8bb659ae0f 100644
--- a/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py
+++ b/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py
@@ -224,10 +224,11 @@ def get_initial_render(self) -> dict | None:
         if initial_render is None:
             raise RuntimeError("Initial render is None")
 
-        # Store the initial nominal tactile data
+        # Store the initial nominal tactile data; output values are ProxyArray —
+        # snapshot the torch view to keep the existing torch.Tensor downstream contract.
         self._nominal_tactile = dict()
         for key, value in initial_render.items():
-            self._nominal_tactile[key] = value.clone()
+            self._nominal_tactile[key] = value.torch.clone()
 
         return self._nominal_tactile
 
@@ -583,7 +584,8 @@ def _update_camera_tactile(self, env_ids: Sequence[int] | slice):
             depth_key = "depth"
 
         if depth_key:
-            self._data.tactile_depth_image[env_ids] = camera_data.output[depth_key][env_ids].clone()
+            # camera_data.output[depth_key] is ProxyArray; index its torch view explicitly.
+            self._data.tactile_depth_image[env_ids] = camera_data.output[depth_key].torch[env_ids].clone()
             diff = self._nominal_tactile[depth_key][env_ids] - self._data.tactile_depth_image[env_ids]
             self._data.tactile_rgb_image[env_ids] = self._tactile_rgb_render.render(diff.squeeze(-1))
 
diff --git a/source/isaaclab_newton/changelog.d/camera-data-proxyarray.skip b/source/isaaclab_newton/changelog.d/camera-data-proxyarray.skip
new file mode 100644
index 000000000000..7bb765d75772
--- /dev/null
+++ b/source/isaaclab_newton/changelog.d/camera-data-proxyarray.skip
@@ -0,0 +1,2 @@
+Internal-only update: NewtonWarpRenderer signatures realigned with the BaseRenderer
+wp.array contract. No public API changes beyond what the isaaclab fragment covers.
diff --git a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
index a02d820f2951..d6e314ed2d46 100644
--- a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
+++ b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
@@ -52,18 +52,18 @@ def __init__(self, newton_sensor: newton.sensors.SensorTiledCamera, spec: Camera
         self.width = getattr(spec.cfg, "width", 100)
         self.height = getattr(spec.cfg, "height", 100)
 
-    def set_outputs(self, output_data: dict[str, torch.Tensor]):
-        for output_name, tensor_data in output_data.items():
+    def set_outputs(self, output_data: dict[str, wp.array]):
+        for output_name, wp_data in output_data.items():
             if output_name == RenderBufferKind.RGBA:
-                self.outputs.color_image = self._from_torch(tensor_data, dtype=wp.uint32)
+                self.outputs.color_image = self._typed_view(wp_data, dtype=wp.uint32)
             elif output_name == RenderBufferKind.ALBEDO:
-                self.outputs.albedo_image = self._from_torch(tensor_data, dtype=wp.uint32)
+                self.outputs.albedo_image = self._typed_view(wp_data, dtype=wp.uint32)
             elif output_name == RenderBufferKind.DEPTH:
-                self.outputs.depth_image = self._from_torch(tensor_data, dtype=wp.float32)
+                self.outputs.depth_image = self._typed_view(wp_data, dtype=wp.float32)
             elif output_name == RenderBufferKind.NORMALS:
-                self.outputs.normals_image = self._from_torch(tensor_data, dtype=wp.vec3f)
+                self.outputs.normals_image = self._typed_view(wp_data, dtype=wp.vec3f)
             elif output_name == RenderBufferKind.INSTANCE_SEGMENTATION_FAST:
-                self.outputs.instance_segmentation_image = self._from_torch(tensor_data, dtype=wp.uint32)
+                self.outputs.instance_segmentation_image = self._typed_view(wp_data, dtype=wp.uint32)
             elif output_name == RenderBufferKind.RGB:
                 pass
             else:
@@ -82,9 +82,10 @@ def get_output(self, output_name: str) -> wp.array:
             return self.outputs.instance_segmentation_image
         return None
 
-    def update(self, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor):
+    def update(self, positions: wp.array, orientations: wp.array, intrinsics: wp.array):
+        # The convention helper expects a torch tensor; use the zero-copy view of the wp.array.
         converted_orientations = convert_camera_frame_orientation_convention(
-            orientations, origin="world", target="opengl"
+            wp.to_torch(orientations), origin="world", target="opengl"
         )
 
         self.camera_transforms = wp.empty(
@@ -98,29 +99,26 @@ def update(self, positions: torch.Tensor, orientations: torch.Tensor, intrinsics
         )
 
         if self.camera_rays is None:
-            first_focal_length = intrinsics[:, 1, 1][0:1]
+            intrinsics_torch = wp.to_torch(intrinsics)
+            first_focal_length = intrinsics_torch[:, 1, 1][0:1]
             fov_radians_all = 2.0 * torch.atan(self.height / (2.0 * first_focal_length))
 
             self.camera_rays = self.newton_sensor.utils.compute_pinhole_camera_rays(
                 self.width, self.height, wp.from_torch(fov_radians_all, dtype=wp.float32)
             )
 
-    def _from_torch(self, tensor: torch.Tensor, dtype) -> wp.array:
-        proxy_array = wp.from_torch(tensor)
-        if tensor.is_contiguous():
-            return wp.array(
-                ptr=proxy_array.ptr,
-                dtype=dtype,
-                shape=(self.newton_sensor.model.world_count, self.num_cameras, self.height, self.width),
-                device=proxy_array.device,
-                copy=False,
-            )
+    def _typed_view(self, wp_data: wp.array, dtype) -> wp.array:
+        """Reinterpret the output buffer as ``(world_count, num_cameras, height, width)`` of ``dtype``.
 
-        logger.warning("NewtonWarpRenderer - torch output array is non-contiguous")
-        return wp.zeros(
-            (self.newton_sensor.model.world_count, self.num_cameras, self.height, self.width),
+        The Newton sensor writes per-pixel typed records; the camera buffer is allocated as a
+        4-D scalar array of the corresponding torch dtype. Both views share the same memory.
+        """
+        return wp.array(
+            ptr=wp_data.ptr,
             dtype=dtype,
-            device=proxy_array.device,
+            shape=(self.newton_sensor.model.world_count, self.num_cameras, self.height, self.width),
+            device=wp_data.device,
+            copy=False,
         )
 
     @wp.kernel
@@ -202,7 +200,7 @@ def create_render_data(self, spec: CameraRenderSpec) -> RenderData:
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.create_render_data`."""
         return RenderData(self.newton_sensor, spec)
 
-    def set_outputs(self, render_data: RenderData, output_data: dict[str, torch.Tensor]):
+    def set_outputs(self, render_data: RenderData, output_data: dict[str, wp.array]):
         """Store output buffers. See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs`."""
         render_data.set_outputs(output_data)
 
@@ -211,9 +209,7 @@ def update_transforms(self):
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_transforms`."""
         SimulationContext.instance().update_scene_data_provider(True)
 
-    def update_camera(
-        self, render_data: RenderData, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor
-    ):
+    def update_camera(self, render_data: RenderData, positions: wp.array, orientations: wp.array, intrinsics: wp.array):
         """Update camera poses and intrinsics.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_camera`."""
         render_data.update(positions, orientations, intrinsics)
@@ -241,9 +237,9 @@ def read_output(self, render_data: RenderData, camera_data: CameraData) -> None:
                 continue
             image_data = render_data.get_output(output_name)
             if image_data is not None:
-                output_data = camera_data.output[output_name]
-                if image_data.ptr != output_data.data_ptr():
-                    wp.copy(wp.from_torch(output_data), image_data)
+                output_proxy = camera_data.output[output_name]
+                if image_data.ptr != output_proxy.warp.ptr:
+                    wp.copy(output_proxy.warp, image_data)
 
     def cleanup(self, render_data: RenderData | None):
         """Release resources. No-op for Newton Warp.
diff --git a/source/isaaclab_ov/changelog.d/camera-data-proxyarray.skip b/source/isaaclab_ov/changelog.d/camera-data-proxyarray.skip
new file mode 100644
index 000000000000..e493955cdea2
--- /dev/null
+++ b/source/isaaclab_ov/changelog.d/camera-data-proxyarray.skip
@@ -0,0 +1,2 @@
+Internal-only update: OVRTXRenderer signatures realigned with the BaseRenderer
+wp.array contract. No public API changes beyond what the isaaclab fragment covers.
diff --git a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py
index 5d1782373d87..624b984c690d 100644
--- a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py
+++ b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py
@@ -374,35 +374,36 @@ def create_render_data(self, spec: CameraRenderSpec) -> OVRTXRenderData:
         torch.int32: wp.int32,
     }
 
-    def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, torch.Tensor]) -> None:
-        """Wrap caller-owned torch output tensors as zero-copy warp arrays.
+    def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, wp.array]) -> None:
+        """Wrap caller-owned ``wp.array`` output buffers with the strict per-kind dtype OVRTX expects.
 
         Aliased views over a contiguous sibling (e.g. ``rgb`` over ``rgba``) are
-        skipped; any other non-contiguous tensor raises ``ValueError``.
+        skipped; any other non-contiguous array raises ``ValueError``.
 
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs`.
         """
         render_data.warp_buffers = {}
-        for name, tensor in output_data.items():
-            if not tensor.is_contiguous():
-                if tensor.data_ptr() in {t.data_ptr() for t in output_data.values() if t.is_contiguous()}:
+        contiguous_ptrs = {arr.ptr for arr in output_data.values() if arr.is_contiguous}
+        for name, arr in output_data.items():
+            if not arr.is_contiguous:
+                if arr.ptr in contiguous_ptrs:
                     continue
                 raise ValueError(
                     f"OVRTXRenderer.set_outputs: output '{name}' is non-contiguous and is not an"
-                    " alias of a contiguous output tensor; cannot wrap as a zero-copy warp array."
+                    " alias of a contiguous output buffer; cannot rewrap with the OVRTX dtype."
                 )
-            wp_dtype = self._TORCH_TO_WP_DTYPE.get(tensor.dtype)
+            torch_dtype = wp.to_torch(arr).dtype
+            wp_dtype = self._TORCH_TO_WP_DTYPE.get(torch_dtype)
             if wp_dtype is None:
                 raise ValueError(
-                    f"OVRTXRenderer.set_outputs: unsupported torch dtype {tensor.dtype} for output"
+                    f"OVRTXRenderer.set_outputs: unsupported dtype {torch_dtype} for output"
                     f" '{name}'. Add it to OVRTXRenderer._TORCH_TO_WP_DTYPE."
                 )
-            torch_array = wp.from_torch(tensor)
             render_data.warp_buffers[name] = wp.array(
-                ptr=torch_array.ptr,
+                ptr=arr.ptr,
                 dtype=wp_dtype,
-                shape=tuple(tensor.shape),
-                device=torch_array.device,
+                shape=tuple(arr.shape),
+                device=arr.device,
                 copy=False,
             )
 
@@ -436,14 +437,19 @@ def update_transforms(self) -> None:
     def update_camera(
         self,
         render_data: OVRTXRenderData,
-        positions: torch.Tensor,
-        orientations: torch.Tensor,
-        intrinsics: torch.Tensor,
+        positions: wp.array,
+        orientations: wp.array,
+        intrinsics: wp.array,
     ) -> None:
         """Update camera transforms in OVRTX binding."""
         num_envs = positions.shape[0]
-        camera_quats_opengl = convert_camera_frame_orientation_convention(orientations, origin="world", target="opengl")
-        camera_positions_wp = wp.from_torch(positions.contiguous(), dtype=wp.vec3)
+        # convention helper expects torch; use the zero-copy view of the wp.array
+        camera_quats_opengl = convert_camera_frame_orientation_convention(
+            wp.to_torch(orientations), origin="world", target="opengl"
+        )
+        camera_positions_wp = wp.array(
+            ptr=positions.ptr, dtype=wp.vec3, shape=(positions.shape[0],), device=positions.device, copy=False
+        )
         camera_orientations_wp = wp.from_torch(camera_quats_opengl.contiguous(), dtype=wp.quatf)
         camera_transforms = wp.zeros(num_envs, dtype=wp.mat44d, device=DEVICE)
         wp.launch(
diff --git a/source/isaaclab_physx/changelog.d/camera-data-proxyarray.minor.rst b/source/isaaclab_physx/changelog.d/camera-data-proxyarray.minor.rst
new file mode 100644
index 000000000000..31e7a21aa0a4
--- /dev/null
+++ b/source/isaaclab_physx/changelog.d/camera-data-proxyarray.minor.rst
@@ -0,0 +1,13 @@
+Changed
+^^^^^^^
+
+* **Breaking:** :class:`~isaaclab_physx.renderers.IsaacRtxRenderer` now writes
+  rendered tiles into ``dict[str, wp.array]`` output buffers (was
+  ``dict[str, torch.Tensor]``).
+  :meth:`~isaaclab_physx.renderers.IsaacRtxRenderer.set_outputs` and
+  :meth:`~isaaclab_physx.renderers.IsaacRtxRenderer.update_camera` accept
+  :class:`warp.array` arguments to match the new
+  :class:`~isaaclab.renderers.base_renderer.BaseRenderer` contract. The rgb
+  alias of rgba is preallocated as a torch view in
+  :meth:`~isaaclab.sensors.camera.CameraData.allocate` rather than rebound at
+  render time.
diff --git a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py
index 242ac3729d0b..10f90e53c958 100644
--- a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py
+++ b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py
@@ -72,7 +72,7 @@ class IsaacRtxRenderData:
 
     annotators: dict[str, Any]
     render_product_paths: list[str]
-    output_data: dict[str, torch.Tensor] | None = None
+    output_data: dict[str, wp.array] | None = None
     spec: CameraRenderSpec | None = None
     renderer_info: dict[str, Any] = field(default_factory=dict)
 
@@ -277,7 +277,7 @@ def _resolve_simple_shading_mode(self, spec: CameraRenderSpec) -> int | None:
             )
         return SIMPLE_SHADING_MODES[requested[0]]
 
-    def set_outputs(self, render_data: IsaacRtxRenderData, output_data: dict[str, torch.Tensor]):
+    def set_outputs(self, render_data: IsaacRtxRenderData, output_data: dict[str, wp.array]):
         """Store reference to output buffers for writing during render.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs`."""
         render_data.output_data = output_data
@@ -290,9 +290,9 @@ def update_transforms(self) -> None:
     def update_camera(
         self,
         render_data: IsaacRtxRenderData,
-        positions: torch.Tensor,
-        orientations: torch.Tensor,
-        intrinsics: torch.Tensor,
+        positions: wp.array,
+        orientations: wp.array,
+        intrinsics: wp.array,
     ):
         """No-op for Replicator - uses USD camera prims directly.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_camera`."""
@@ -364,35 +364,38 @@ def tiling_grid_shape():
             if data_type in SIMPLE_SHADING_MODES:
                 tiled_data_buffer = tiled_data_buffer[:, :, :3].contiguous()
 
+            output_wp = output_data[data_type]
             wp.launch(
                 kernel=reshape_tiled_image,
                 dim=(view_count, cfg.height, cfg.width),
                 inputs=[
                     tiled_data_buffer.flatten(),
-                    wp.from_torch(output_data[data_type]),
-                    *list(output_data[data_type].shape[1:]),
+                    output_wp,
+                    *list(output_wp.shape[1:]),
                     num_tiles_x,
                 ],
                 device=device,
             )
 
-            # alias rgb as first 3 channels of rgba
-            if data_type == "rgba" and "rgb" in cfg.data_types:
-                output_data["rgb"] = output_data["rgba"][..., :3]
+            # rgb is pre-allocated as a non-contiguous view of rgba in CameraData.allocate(),
+            # so the kernel write to rgba is automatically reflected in rgb. No rebind needed.
 
             # NOTE: The `distance_to_camera` annotator returns the distance to the camera optical center.
             #       However, the replicator depth clipping is applied w.r.t. to the image plane which may result
             #       in values larger than the clipping range in the output. We apply an additional clipping to
             #       ensure values are within the clipping range for all the annotators.
             if data_type == "distance_to_camera":
-                output_data[data_type][output_data[data_type] > cfg.spawn.clipping_range[1]] = torch.inf
+                # Use the zero-copy torch view of the wp.array primary for the in-place clip.
+                view = wp.to_torch(output_wp)
+                view[view > cfg.spawn.clipping_range[1]] = torch.inf
 
             # apply defined clipping behavior
             if (
                 data_type in ("distance_to_camera", "distance_to_image_plane", "depth")
                 and self.cfg.depth_clipping_behavior != "none"
             ):
-                output_data[data_type][torch.isinf(output_data[data_type])] = (
+                view = wp.to_torch(output_wp)
+                view[torch.isinf(view)] = (
                     0.0 if self.cfg.depth_clipping_behavior == "zero" else cfg.spawn.clipping_range[1]
                 )
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py
index 807c0cc09d91..e2934acc8370 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py
@@ -96,31 +96,33 @@ def _apply_action(self) -> None:
 
     def _get_observations(self) -> dict:
         data_type = self.cfg.tiled_camera.data_types[0]
+        # CameraData.output entries are ProxyArray; .torch yields a zero-copy torch view.
+        output_torch = self._tiled_camera.data.output[data_type].torch
         if "rgb" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type] / 255.0
+            camera_data = output_torch / 255.0
             # normalize the camera data for better training results
             mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True)
             camera_data -= mean_tensor
         elif "albedo" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type][..., :3] / 255.0
+            camera_data = output_torch[..., :3] / 255.0
             # normalize the camera data for better training results
             mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True)
             camera_data -= mean_tensor
         elif data_type in SIMPLE_SHADING_TYPES:
-            camera_data = self._tiled_camera.data.output[data_type] / 255.0
+            camera_data = output_torch / 255.0
             # normalize the camera data for better training results
             mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True)
             camera_data -= mean_tensor
         elif "depth" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type]
+            camera_data = output_torch
             camera_data[camera_data == float("inf")] = 0
         elif "semantic_segmentation" in self.cfg.tiled_camera.data_types:
-            camera_data = self._tiled_camera.data.output[data_type]
+            camera_data = output_torch
 
         observations = {"policy": camera_data.clone()}
 
         if self.cfg.write_image_to_file:
-            save_images_to_file(self._tiled_camera.data.output[data_type] / 255.0, f"cartpole_{data_type}.png")
+            save_images_to_file(output_torch / 255.0, f"cartpole_{data_type}.png")
 
         return observations
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
index 7da4db48e853..3e73839906db 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py
@@ -69,8 +69,11 @@ def _compute_image_observations(self):
         object_pose = torch.cat([self.object_pos, self.gt_keypoints.view(-1, 24)], dim=-1)
 
         # train CNN to regress on keypoint positions
+        # CameraData.output entries are ProxyArray; the feature extractor expects
+        # torch tensors, so unwrap each entry via .torch (zero-copy view).
+        camera_output_torch = {k: v.torch for k, v in self._tiled_camera.data.output.items()}
         pose_loss, embeddings = self.feature_extractor.step(
-            self._tiled_camera.data.output,
+            camera_output_torch,
             object_pose,
         )
 
diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py
index fe666a7ecffe..b7fad5087aaa 100644
--- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py
+++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py
@@ -215,7 +215,8 @@ def __init__(self, cfg, env: ManagerBasedRLEnv):
     def __call__(
         self, env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, normalize: bool = True
     ) -> torch.Tensor:  # obtain the input image
-        images = self.sensor.data.output[self.sensor_type]
+        # CameraData.output entries are ProxyArray; .torch is a zero-copy view.
+        images = self.sensor.data.output[self.sensor_type].torch
         torch.nan_to_num_(images, nan=1e6)
         if normalize:
             images = self.norm_fn(images)

From c5ba2a1b3ec71a9336a2d57d838284559708a143 Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Thu, 7 May 2026 21:07:41 +0000
Subject: [PATCH 2/3] Updates

---
 .../camera-data-proxyarray.minor.rst          |  13 +
 .../isaaclab/renderers/output_contract.py     |  12 +-
 .../isaaclab/sensors/camera/camera.py         | 454 +++++++++++-------
 .../isaaclab/sensors/camera/camera_data.py    |  77 +--
 .../isaaclab/sensors/camera/kernels.py        | 318 ++++++++++++
 .../sensors/camera/orientation_conventions.py | 240 +++++++++
 .../sensors/ray_caster/ray_caster_camera.py   |  19 +-
 .../isaaclab/isaaclab/utils/warp/kernels.py   |  26 +
 .../renderers/test_camera_output_contract.py  |  25 +-
 .../renderers/newton_warp_renderer.py         |  61 ++-
 .../isaaclab_ov/renderers/ovrtx_renderer.py   | 100 ++--
 .../renderers/isaac_rtx_renderer.py           |  52 +-
 12 files changed, 1069 insertions(+), 328 deletions(-)
 create mode 100644 source/isaaclab/isaaclab/sensors/camera/kernels.py
 create mode 100644 source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py

diff --git a/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst b/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst
index 080dff3cf95a..47a7cb797798 100644
--- a/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst
+++ b/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst
@@ -36,3 +36,16 @@ Changed
   ``wp.array | ProxyArray | torch.Tensor | numpy.ndarray`` inputs, with
   :class:`warp.array` documented as the canonical type. Existing
   ``torch.Tensor`` callers continue to work without changes.
+* **Breaking:** :attr:`~isaaclab.renderers.RenderBufferSpec.dtype` now stores a
+  Warp scalar dtype (e.g. ``warp.uint8``) rather than a ``torch.dtype``.
+  Renderer authors that build :class:`RenderBufferSpec` instances must update
+  ``RenderBufferSpec(channels, torch.uint8)`` to ``RenderBufferSpec(channels,
+  wp.uint8)`` (and similarly for ``torch.float32``, ``torch.int32``).
+* :class:`~isaaclab.sensors.camera.Camera`,
+  :class:`~isaaclab.sensors.camera.CameraData`, and the three first-party
+  renderers (:class:`IsaacRtxRenderer`, :class:`NewtonWarpRenderer`,
+  :class:`OVRTXRenderer`) no longer import or use ``torch`` internally: every
+  storage allocation goes through :func:`warp.zeros`, scatter / mask / clipping
+  operations are warp kernels (see
+  :mod:`~isaaclab.sensors.camera.kernels`), and quaternion convention
+  conversions are implemented by :func:`isaaclab.sensors.camera.orientation_conventions.convert_quat_array`.
diff --git a/source/isaaclab/isaaclab/renderers/output_contract.py b/source/isaaclab/isaaclab/renderers/output_contract.py
index bfa3fff41d8b..7b04ae167caf 100644
--- a/source/isaaclab/isaaclab/renderers/output_contract.py
+++ b/source/isaaclab/isaaclab/renderers/output_contract.py
@@ -14,10 +14,7 @@
 
 from dataclasses import dataclass
 from enum import StrEnum
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    import torch
+from typing import Any
 
 
 class RenderBufferKind(StrEnum):
@@ -48,7 +45,8 @@ class RenderBufferSpec:
     """Per-pixel layout (channels + dtype) for one render buffer kind."""
 
     channels: int
-    """Number of per-pixel channels (last dimension of the allocated tensor)."""
+    """Number of per-pixel channels (last dimension of the allocated buffer)."""
 
-    dtype: torch.dtype
-    """Torch dtype the renderer writes for this render buffer kind."""
+    dtype: Any
+    """Warp scalar dtype (e.g. ``warp.uint8``, ``warp.float32``, ``warp.int32``)
+    the renderer writes for this render buffer kind."""
diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index 2ba26c672c73..b63f44a044ce 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -7,10 +7,9 @@
 
 import logging
 from collections.abc import Sequence
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Any, Literal
 
 import numpy as np
-import torch
 import warp as wp
 
 from pxr import UsdGeom
@@ -21,15 +20,12 @@
 from isaaclab.renderers import BaseRenderer, CameraRenderSpec
 from isaaclab.sim.views import FrameView
 from isaaclab.utils import to_camel_case
-from isaaclab.utils.math import (
-    convert_camera_frame_orientation_convention,
-    create_rotation_matrix_from_view,
-    quat_from_matrix,
-)
 from isaaclab.utils.warp.proxy_array import ProxyArray
 
 from ..sensor_base import SensorBase
+from . import kernels as camera_kernels
 from .camera_data import CameraData, RenderBufferKind
+from .orientation_conventions import _resolve_conversion_quat, convert_quat_array
 
 if TYPE_CHECKING:
     from .camera_cfg import CameraCfg
@@ -38,6 +34,36 @@
 logger = logging.getLogger(__name__)
 
 
+def _to_wp_array(value: Any, device: str, dtype: Any | None = None) -> wp.array:
+    """Normalize an array-like input to a :class:`warp.array` on ``device``.
+
+    Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`,
+    :class:`numpy.ndarray`, lists/tuples of numbers, and torch tensors (detected
+    structurally via :func:`warp.from_torch`'s interop — no torch import needed).
+
+    The ``dtype`` argument, if provided, controls the returned ``wp.array`` dtype.
+    """
+    # ProxyArray and wp.array are the canonical fast paths.
+    if isinstance(value, ProxyArray):
+        arr = value.warp
+        if dtype is not None and arr.dtype is not dtype:
+            arr = wp.array(ptr=arr.ptr, dtype=dtype, shape=arr.shape, device=arr.device, copy=False)
+        return arr
+    if isinstance(value, wp.array):
+        if dtype is not None and value.dtype is not dtype:
+            value = wp.array(ptr=value.ptr, dtype=dtype, shape=value.shape, device=value.device, copy=False)
+        return value
+    # Torch tensors are detected by their module and the presence of ``data_ptr``,
+    # so this module never imports torch itself. ``wp.from_torch`` lives in warp's
+    # interop layer.
+    if type(value).__module__.startswith("torch") and hasattr(value, "data_ptr"):
+        contiguous = value.contiguous() if hasattr(value, "is_contiguous") and not value.is_contiguous() else value
+        return wp.from_torch(contiguous, dtype=dtype) if dtype is not None else wp.from_torch(contiguous)
+    # Numpy / lists / tuples — copy onto the device.
+    np_value = np.asarray(value)
+    return wp.array(np_value, dtype=dtype, device=device)
+
+
 class Camera(SensorBase):
     r"""The camera sensor for acquiring visual data.
 
@@ -118,12 +144,22 @@ def __init__(self, cfg: CameraCfg):
         if renderer_type == "isaac_rtx":
             get_settings_manager().set_bool("/isaaclab/render/rtx_sensors", True)
 
-        # Compute camera orientation (convention conversion) and spawn
-        rot = torch.tensor(self.cfg.offset.rot, dtype=torch.float32, device="cpu").unsqueeze(0)
-        rot_offset = convert_camera_frame_orientation_convention(
-            rot, origin=self.cfg.offset.convention, target="opengl"
+        # Compute the spawn orientation in OpenGL convention without involving torch:
+        # the cfg-supplied quaternion is constant per cfg, so we resolve the
+        # convention conversion as a single quaternion product on the CPU.
+        rot_xyzw = tuple(float(c) for c in self.cfg.offset.rot)
+        conv = _resolve_conversion_quat(self.cfg.offset.convention, "opengl")
+        sx, sy, sz, sw = rot_xyzw
+        cx, cy, cz, cw = conv
+        rot_offset = np.array(
+            [
+                sw * cx + sx * cw + sy * cz - sz * cy,
+                sw * cy - sx * cz + sy * cw + sz * cx,
+                sw * cz + sx * cy - sy * cx + sz * cw,
+                sw * cw - sx * cx - sy * cy - sz * cz,
+            ],
+            dtype=np.float32,
         )
-        rot_offset = rot_offset.squeeze(0).cpu().numpy()
         if self.cfg.spawn is not None and self.cfg.spawn.vertical_aperture is None:
             self.cfg.spawn.vertical_aperture = self.cfg.spawn.horizontal_aperture * self.cfg.height / self.cfg.width
         self._resolve_and_spawn("camera", translation=self.cfg.offset.pos, orientation=rot_offset)
@@ -191,7 +227,7 @@ def image_shape(self) -> tuple[int, int]:
 
     def set_intrinsic_matrices(
         self,
-        matrices: wp.array | ProxyArray | torch.Tensor | np.ndarray,
+        matrices: wp.array | ProxyArray | Any,
         focal_length: float | None = None,
         env_ids: Sequence[int] | None = None,
     ):
@@ -215,29 +251,26 @@ def set_intrinsic_matrices(
             matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). A
                 :class:`warp.array` is the canonical input type;
                 :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`,
-                :class:`torch.Tensor`, and :class:`numpy.ndarray` are also accepted.
+                :class:`numpy.ndarray`, and :class:`torch.Tensor` are also accepted.
             focal_length: Perspective focal length (in cm) used to calculate pixel size. Defaults to None. If None,
                 focal_length will be calculated 1 / width.
             env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
         """
         # resolve env_ids
         if env_ids is None:
-            env_ids = self._ALL_INDICES
-        # normalize input: ProxyArray → wp.array → torch.Tensor → numpy
-        if isinstance(matrices, ProxyArray):
-            matrices = matrices.torch
-        elif isinstance(matrices, wp.array):
-            matrices = wp.to_torch(matrices)
-        if isinstance(matrices, torch.Tensor):
-            matrices = matrices.cpu().numpy()
-        else:
-            matrices = np.asarray(matrices, dtype=float)
+            env_ids = list(range(self._view.count))
+        # USD attribute writes are scalar host-side; pull a numpy view of the input.
+        matrices_wp = _to_wp_array(matrices, device=self._device)
+        matrices_np = matrices_wp.numpy()
         # iterate over env_ids
-        for i, intrinsic_matrix in zip(env_ids, matrices):
+        for i, intrinsic_matrix in zip(env_ids, matrices_np):
             height, width = self.image_shape
 
             params = sensor_utils.convert_camera_intrinsics_to_usd(
-                intrinsic_matrix=intrinsic_matrix.reshape(-1), height=height, width=width, focal_length=focal_length
+                intrinsic_matrix=np.asarray(intrinsic_matrix, dtype=float).reshape(-1),
+                height=height,
+                width=width,
+                focal_length=focal_length,
             )
 
             # change data for corresponding camera index
@@ -262,8 +295,8 @@ def set_intrinsic_matrices(
 
     def set_world_poses(
         self,
-        positions: wp.array | ProxyArray | torch.Tensor | np.ndarray | None = None,
-        orientations: wp.array | ProxyArray | torch.Tensor | np.ndarray | None = None,
+        positions: wp.array | ProxyArray | Any | None = None,
+        orientations: wp.array | ProxyArray | Any | None = None,
         env_ids: Sequence[int] | None = None,
         convention: Literal["opengl", "ros", "world"] = "ros",
     ):
@@ -281,8 +314,8 @@ def set_world_poses(
 
         Args:
             positions: The cartesian coordinates [m]. Shape is (N, 3). A :class:`warp.array`
-                is the canonical input type; :class:`ProxyArray`, :class:`torch.Tensor`,
-                and :class:`numpy.ndarray` are also accepted. Defaults to None, in which
+                is the canonical input type; :class:`ProxyArray`, :class:`numpy.ndarray`,
+                and :class:`torch.Tensor` are also accepted. Defaults to None, in which
                 case the camera position is not changed.
             orientations: The quaternion orientation in (x, y, z, w). Shape is (N, 4).
                 Same accepted types as ``positions``. Defaults to None, in which case
@@ -293,61 +326,63 @@ def set_world_poses(
         Raises:
             RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
         """
-        # resolve env_ids
-        if env_ids is None:
-            env_ids = self._ALL_INDICES
-        # normalize positions to a torch tensor on the sensor device
+        # Position buffer for FrameView: (N,) of wp.vec3.
+        pos_wp: wp.array | None = None
         if positions is not None:
-            positions = self._to_device_tensor(positions)
-        # convert rotation matrix from input convention to OpenGL
+            pos_wp = _to_wp_array(positions, device=self._device, dtype=wp.vec3)
+
+        # Orientation buffer for FrameView: (N,) of wp.quatf in the OpenGL convention.
+        ori_wp: wp.array | None = None
         if orientations is not None:
-            orientations = self._to_device_tensor(orientations)
-            orientations = convert_camera_frame_orientation_convention(orientations, origin=convention, target="opengl")
-        # convert torch tensors to warp arrays for the view
-        pos_wp = wp.from_torch(positions.contiguous()) if positions is not None else None
-        ori_wp = wp.from_torch(orientations.contiguous()) if orientations is not None else None
-        if env_ids is not None:
-            if not isinstance(env_ids, torch.Tensor):
-                env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device)
-            idx_wp = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32)
-        else:
-            idx_wp = None
+            src_quat = _to_wp_array(orientations, device=self._device, dtype=wp.quatf)
+            ori_wp = (
+                convert_quat_array(src_quat, origin=convention, target="opengl") if convention != "opengl" else src_quat
+            )
+
+        idx_wp = self._resolve_env_indices(env_ids)
         self._view.set_world_poses(pos_wp, ori_wp, idx_wp)
 
     def set_world_poses_from_view(
         self,
-        eyes: wp.array | ProxyArray | torch.Tensor | np.ndarray,
-        targets: wp.array | ProxyArray | torch.Tensor | np.ndarray,
+        eyes: wp.array | ProxyArray | Any,
+        targets: wp.array | ProxyArray | Any,
         env_ids: Sequence[int] | None = None,
     ):
         """Set the poses of the camera from the eye position and look-at target position.
 
         Args:
-            eyes: The positions of the camera's eye. Shape is (N, 3). A :class:`warp.array`
-                is the canonical input type; :class:`ProxyArray`, :class:`torch.Tensor`,
-                and :class:`numpy.ndarray` are also accepted.
-            targets: The target locations to look at. Shape is (N, 3). Same accepted types
-                as ``eyes``.
+            eyes: The positions of the camera's eye [m]. Shape is (N, 3). A
+                :class:`warp.array` is the canonical input type;
+                :class:`ProxyArray`, :class:`numpy.ndarray`, and :class:`torch.Tensor`
+                are also accepted.
+            targets: The target locations to look at [m]. Shape is (N, 3). Same
+                accepted types as ``eyes``.
             env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices.
 
         Raises:
             RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
             NotImplementedError: If the stage up-axis is not "Y" or "Z".
         """
-        # resolve env_ids
-        if env_ids is None:
-            env_ids = self._ALL_INDICES
-        # normalize inputs to torch tensors on the sensor device
-        eyes = self._to_device_tensor(eyes)
-        targets = self._to_device_tensor(targets)
-        # get up axis of current stage
+        eyes_wp = _to_wp_array(eyes, device=self._device, dtype=wp.vec3)
+        targets_wp = _to_wp_array(targets, device=self._device, dtype=wp.vec3)
         up_axis = UsdGeom.GetStageUpAxis(self.stage)
-        # set camera poses using the view
-        orientations = quat_from_matrix(create_rotation_matrix_from_view(eyes, targets, up_axis, device=self._device))
-        if not isinstance(env_ids, torch.Tensor):
-            env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device)
-        idx_wp = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32)
-        self._view.set_world_poses(wp.from_torch(eyes.contiguous()), wp.from_torch(orientations.contiguous()), idx_wp)
+        if up_axis == "Y":
+            up_vec = wp.vec3(0.0, 1.0, 0.0)
+        elif up_axis == "Z":
+            up_vec = wp.vec3(0.0, 0.0, 1.0)
+        else:
+            raise NotImplementedError(f"Stage up axis {up_axis!r} is not supported (expected 'Y' or 'Z').")
+        n = int(eyes_wp.shape[0])
+        ori_wp = wp.empty(n, dtype=wp.quatf, device=self._device)
+        wp.launch(
+            kernel=camera_kernels.look_at_quat_kernel,
+            dim=n,
+            inputs=[eyes_wp, targets_wp, up_vec],
+            outputs=[ori_wp],
+            device=self._device,
+        )
+        idx_wp = self._resolve_env_indices(env_ids)
+        self._view.set_world_poses(eyes_wp, ori_wp, idx_wp)
 
     """
     Operations
@@ -360,16 +395,19 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None
             )
         # reset the timestamps
         super().reset(env_ids, env_mask)
-        # resolve to indices for torch indexing
-        if env_ids is None and env_mask is not None:
-            env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1)
-        elif env_ids is None:
-            env_ids = self._ALL_INDICES
-        # reset the data
+        # Resolve to a wp.int32 array of indices and a wp.bool mask covering them.
+        env_ids_wp, env_mask_local = self._resolve_env_ids_or_mask(env_ids, env_mask)
+        if env_ids_wp.shape[0] == 0:
+            return
         # note: this recomputation is useful if one performs events such as randomizations on the camera poses.
-        self._update_poses(env_ids)
-        # Reset the frame count (ProxyArray; mutate via the zero-copy torch view)
-        self._frame.torch[env_ids] = 0
+        self._update_poses_wp(env_ids_wp)
+        # Reset the frame count via a warp scatter kernel.
+        wp.launch(
+            camera_kernels.masked_set_int64_kernel,
+            dim=self._view.count,
+            inputs=[env_mask_local, wp.int64(0), self._frame.warp],
+            device=self._device,
+        )
 
     """
     Implementation.
@@ -410,12 +448,13 @@ def _initialize_impl(self):
                 f" the number of environments ({self._num_envs})."
             )
 
-        # Create all env_ids buffer
-        self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long)
-        # Create frame count buffer (wp.array primary, ProxyArray-published).
-        self._frame = ProxyArray(
-            wp.from_torch(torch.zeros(self._view.count, device=self._device, dtype=torch.long).contiguous())
-        )
+        # ALL_INDICES: wp.int32 array (idiomatic for indexing into other wp.arrays).
+        self._ALL_INDICES = wp.array(np.arange(self._view.count, dtype=np.int32), device=self._device)
+        # Frame count buffer (wp.array primary, ProxyArray-published).
+        self._frame = ProxyArray(wp.zeros(self._view.count, dtype=wp.int64, device=self._device))
+        # Pre-allocate scratch buffer for env_mask -> indices conversion.
+        self._env_indices_scratch = wp.empty(self._view.count, dtype=wp.int32, device=self._device)
+        self._env_indices_count = wp.zeros(1, dtype=wp.int32, device=self._device)
 
         # Convert all encapsulated prims to Camera
         for cam_prim in self._view.prims:
@@ -448,14 +487,20 @@ def _initialize_impl(self):
         self._create_buffers()
 
     def _update_buffers_impl(self, env_mask: wp.array):
-        env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1)
-        if len(env_ids) == 0:
+        # Mask -> compact env_ids array via warp atomic scan (no torch involvement).
+        env_ids_wp, count = self._mask_to_env_ids(env_mask)
+        if count == 0:
             return
-        # Increment frame count (ProxyArray; mutate via the zero-copy torch view)
-        self._frame.torch[env_ids] += 1
+        # Increment frame count via masked warp kernel.
+        wp.launch(
+            camera_kernels.masked_increment_int64_kernel,
+            dim=self._view.count,
+            inputs=[env_mask, self._frame.warp],
+            device=self._device,
+        )
         # update latest camera pose if requested
         if self.cfg.update_latest_camera_pose:
-            self._update_poses(env_ids)
+            self._update_poses_wp(env_ids_wp)
 
         sim_ctx = sim_utils.SimulationContext.instance()
         renderer = self._renderer
@@ -475,27 +520,10 @@ def _update_buffers_impl(self, env_mask: wp.array):
     Private Helpers
     """
 
-    def _to_device_tensor(self, value: wp.array | ProxyArray | torch.Tensor | np.ndarray) -> torch.Tensor:
-        """Normalize an array-like input to a ``torch.Tensor`` on this sensor's device.
-
-        Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`,
-        :class:`torch.Tensor`, and :class:`numpy.ndarray`. Warp inputs are unwrapped via
-        ``wp.to_torch`` (zero-copy).
-        """
-        if isinstance(value, ProxyArray):
-            value = value.torch
-        elif isinstance(value, wp.array):
-            value = wp.to_torch(value)
-        if isinstance(value, np.ndarray):
-            return torch.from_numpy(value).to(device=self._device)
-        if not isinstance(value, torch.Tensor):
-            return torch.tensor(value, device=self._device)
-        return value.to(device=self._device)
-
     def _check_supported_data_types(self, cfg: CameraCfg):
         """Checks if the data types are supported by the ray-caster camera."""
         # check if there is any intersection in unsupported types
-        # reason: these use np structured data types which we can't yet convert to torch tensor
+        # reason: these use np structured data types which we can't yet convert to a warp buffer
         common_elements = set(cfg.data_types) & Camera.UNSUPPORTED_TYPES
         if common_elements:
             # provide alternative fast counterparts
@@ -507,13 +535,18 @@ def _check_supported_data_types(self, cfg: CameraCfg):
             raise ValueError(
                 f"Camera class does not support the following sensor types: {common_elements}."
                 "\n\tThis is because these sensor types output numpy structured data types which"
-                "can't be converted to torch tensors easily."
+                " can't be converted to a warp buffer easily."
                 "\n\tHint: If you need to work with these sensor types, we recommend using their fast counterparts."
                 f"\n\t\tFast counterparts: {fast_common_elements}"
             )
 
     def _create_buffers(self):
-        """Create buffers for storing data."""
+        """Allocate the camera-frame state buffers (pure warp).
+
+        Each :class:`CameraData` field stores a ``wp.array`` primary buffer and is
+        published as a :class:`ProxyArray`. Renderers receive the underlying
+        :class:`warp.array` references via :meth:`set_outputs`.
+        """
         specs = self._renderer.supported_output_types()
         # Split requested names into known/unsupported; warn once for any the renderer can't produce.
         known: list[str] = []
@@ -532,91 +565,104 @@ def _create_buffers(self):
                 type(self._renderer).__name__,
                 unsupported,
             )
+        device_str = str(self._device)
         self._data = CameraData.allocate(
             data_types=known,
             height=self.cfg.height,
             width=self.cfg.width,
             num_views=self._view.count,
-            device=self._device,
+            device=device_str,
             supported_specs=specs,
         )
         # Camera-frame state (pose / intrinsics) is owned by the camera, not
-        # the renderer: populate it on the freshly constructed ``CameraData``.
-        # Each field stores a wp.array primary buffer (allocated via wp.from_torch
-        # over a torch.zeros allocation) and is published as a ProxyArray.
-        self._data.intrinsic_matrices = ProxyArray(
-            wp.from_torch(torch.zeros((self._view.count, 3, 3), device=self._device).contiguous())
-        )
-        self._update_intrinsic_matrices(self._ALL_INDICES)
-        self._data.pos_w = ProxyArray(
-            wp.from_torch(torch.zeros((self._view.count, 3), device=self._device).contiguous())
-        )
-        self._data.quat_w_world = ProxyArray(
-            wp.from_torch(torch.zeros((self._view.count, 4), device=self._device).contiguous())
-        )
-        self._update_poses(self._ALL_INDICES)
+        # the renderer: populate it on the freshly constructed CameraData.
+        n = self._view.count
+        self._data.intrinsic_matrices = ProxyArray(wp.zeros((n, 3, 3), dtype=wp.float32, device=device_str))
+        self._update_intrinsic_matrices(list(range(n)))
+        self._data.pos_w = ProxyArray(wp.zeros(n, dtype=wp.vec3, device=device_str))
+        self._data.quat_w_world = ProxyArray(wp.zeros(n, dtype=wp.quatf, device=device_str))
+        self._update_poses_wp(self._ALL_INDICES)
         # Hand the renderer the underlying wp.array buffers; the ProxyArray
         # wrappers in CameraData stay valid since both views share memory.
         self._renderer.set_outputs(self._render_data, {name: proxy.warp for name, proxy in self._data.output.items()})
 
     def _update_intrinsic_matrices(self, env_ids: Sequence[int]):
-        """Compute camera's matrix of intrinsic parameters.
-
-        Also called calibration matrix. This matrix works for linear depth images. We assume square pixels.
+        """Compute camera's matrix of intrinsic parameters via a warp scatter kernel.
 
-        .. note::
-            The calibration matrix projects points in the 3D scene onto an imaginary screen of the camera.
-            The coordinates of points on the image plane are in the homogeneous representation.
+        The per-camera scalars are read from the USD prims (CPU-bound) and
+        scattered into the GPU buffer in a single launch.
         """
-        # zero-copy torch view of the wp.array buffer; in-place mutation is
-        # visible through both views.
-        intrinsic_view = self._data.intrinsic_matrices.torch
-        # iterate over all cameras
-        for i in env_ids:
-            # Get corresponding sensor prim
-            sensor_prim = self._sensor_prims[i]
-            # get camera parameters
+        if len(env_ids) == 0:
+            return
+        height, width = self.image_shape
+        fx_host = np.empty(len(env_ids), dtype=np.float32)
+        fy_host = np.empty(len(env_ids), dtype=np.float32)
+        cx_host = np.empty(len(env_ids), dtype=np.float32)
+        cy_host = np.empty(len(env_ids), dtype=np.float32)
+        idx_host = np.asarray(list(env_ids), dtype=np.int32)
+        for k, i in enumerate(env_ids):
+            sensor_prim = self._sensor_prims[int(i)]
             # currently rendering does not use aperture offsets or vertical aperture
             focal_length = sensor_prim.GetFocalLengthAttr().Get()
             horiz_aperture = sensor_prim.GetHorizontalApertureAttr().Get()
-
-            # get viewport parameters
-            height, width = self.image_shape
-            # extract intrinsic parameters
             f_x = (width * focal_length) / horiz_aperture
-            f_y = f_x
-            c_x = width * 0.5
-            c_y = height * 0.5
-            # create intrinsic matrix for depth linear
-            intrinsic_view[i, 0, 0] = f_x
-            intrinsic_view[i, 0, 2] = c_x
-            intrinsic_view[i, 1, 1] = f_y
-            intrinsic_view[i, 1, 2] = c_y
-            intrinsic_view[i, 2, 2] = 1
-
-    def _update_poses(self, env_ids: Sequence[int]):
-        """Computes the pose of the camera in the world frame with ROS convention.
-
-        This methods uses the ROS convention to resolve the input pose. In this convention,
-        we assume that the camera front-axis is +Z-axis and up-axis is -Y-axis.
-
-        Returns:
-            A tuple of the position (in meters) and quaternion (x, y, z, w).
+            fx_host[k] = f_x
+            fy_host[k] = f_x
+            cx_host[k] = width * 0.5
+            cy_host[k] = height * 0.5
+        device_str = str(self._device)
+        wp.launch(
+            camera_kernels.write_intrinsic_matrices_kernel,
+            dim=len(env_ids),
+            inputs=[
+                wp.array(idx_host, dtype=wp.int32, device=device_str),
+                wp.array(fx_host, dtype=wp.float32, device=device_str),
+                wp.array(fy_host, dtype=wp.float32, device=device_str),
+                wp.array(cx_host, dtype=wp.float32, device=device_str),
+                wp.array(cy_host, dtype=wp.float32, device=device_str),
+                self._data.intrinsic_matrices.warp,
+            ],
+            device=device_str,
+        )
+
+    def _update_poses_wp(self, env_ids_wp: wp.array):
+        """Pull camera poses from the view and write them into ``self._data`` (pure warp).
+
+        Args:
+            env_ids_wp: ``wp.int32`` array of camera indices to refresh, or
+                :attr:`_ALL_INDICES` to refresh every camera.
         """
-        # check camera prim exists
         if len(self._sensor_prims) == 0:
             raise RuntimeError("Camera prim is None. Please call 'sim.play()' first.")
 
-        # get the poses from the view (returns ProxyArray, use .torch for tensor access)
-        if env_ids is not None and not isinstance(env_ids, torch.Tensor):
-            env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device)
-        indices = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32) if env_ids is not None else None
-        pos_w, quat_w = self._view.get_world_poses(indices)
-        # Mutate via the zero-copy torch view to avoid the ProxyArray.__setitem__
-        # deprecation warning (the view shares memory with the wp.array primary).
-        self._data.pos_w.torch[env_ids] = pos_w.torch
-        self._data.quat_w_world.torch[env_ids] = convert_camera_frame_orientation_convention(
-            quat_w.torch, origin="opengl", target="world"
+        pos_w_proxy, quat_w_proxy = self._view.get_world_poses(env_ids_wp)
+        # Convert quat_w (opengl) -> world via a warp kernel and scatter into CameraData.
+        n_pose = int(env_ids_wp.shape[0])
+        quat_w_world_local = convert_quat_array(quat_w_proxy.warp, origin="opengl", target="world")
+        # Build a temporary wp.bool mask covering env_ids_wp so we can scatter through
+        # the existing masked_set kernels.
+        mask_local = wp.zeros(self._view.count, dtype=wp.bool, device=self._device)
+        wp.launch(
+            camera_kernels.indices_to_mask_kernel,
+            dim=n_pose,
+            inputs=[env_ids_wp, mask_local],
+            device=self._device,
+        )
+        # Scatter pos and quat into self._data via masked copies.
+        # Note: the view's pos/quat arrays are already the same length as env_ids_wp,
+        # so we copy them into a full-sized scratch first via index, then mask-copy.
+        # For correctness, we instead scatter directly from the (M,) arrays using an indexed kernel.
+        wp.launch(
+            camera_kernels.scatter_vec3f_kernel,
+            dim=n_pose,
+            inputs=[env_ids_wp, pos_w_proxy.warp, self._data.pos_w.warp],
+            device=self._device,
+        )
+        wp.launch(
+            camera_kernels.scatter_quatf_kernel,
+            dim=n_pose,
+            inputs=[env_ids_wp, quat_w_world_local, self._data.quat_w_world.warp],
+            device=self._device,
         )
         # notify renderer of updated poses (guarded in case called before initialization completes)
         if self._render_data is not None:
@@ -627,6 +673,76 @@ def _update_poses(self, env_ids: Sequence[int]):
                 self._data.intrinsic_matrices.warp,
             )
 
+    # ---- env id / mask helpers ------------------------------------------------------------------
+
+    def _resolve_env_indices(self, env_ids: Sequence[int] | wp.array | ProxyArray | None) -> wp.array | None:
+        """Resolve user-supplied env_ids to a contiguous ``wp.int32`` array (or ``None``).
+
+        ``None`` is passed through so callers can short-circuit to the "all" branch.
+        """
+        if env_ids is None:
+            return None
+        if isinstance(env_ids, ProxyArray):
+            env_ids = env_ids.warp
+        if isinstance(env_ids, wp.array):
+            if env_ids.dtype is wp.int32:
+                return env_ids
+            return wp.array(env_ids.numpy().astype(np.int32, copy=False), dtype=wp.int32, device=self._device)
+        # Sequence of ints / numpy-array.
+        return wp.array(np.asarray(list(env_ids), dtype=np.int32), dtype=wp.int32, device=self._device)
+
+    def _resolve_env_ids_or_mask(
+        self,
+        env_ids: Sequence[int] | wp.array | ProxyArray | None,
+        env_mask: wp.array | None,
+    ) -> tuple[wp.array, wp.array]:
+        """Return both an int32 indices array and a bool mask covering the same envs."""
+        device_str = str(self._device)
+        if env_mask is not None and env_ids is None:
+            ids, _ = self._mask_to_env_ids(env_mask)
+            return ids, env_mask
+        if env_ids is None:
+            mask = wp.array(np.ones(self._view.count, dtype=np.bool_), dtype=wp.bool, device=device_str)
+            return self._ALL_INDICES, mask
+        ids = self._resolve_env_indices(env_ids)
+        mask = wp.zeros(self._view.count, dtype=wp.bool, device=device_str)
+        wp.launch(
+            camera_kernels.indices_to_mask_kernel,
+            dim=int(ids.shape[0]),
+            inputs=[ids, mask],
+            device=device_str,
+        )
+        return ids, mask
+
+    def _mask_to_env_ids(self, env_mask: wp.array) -> tuple[wp.array, int]:
+        """Compact a ``wp.bool`` mask into a contiguous ``wp.int32`` indices array.
+
+        Returns ``(indices, count)`` where ``indices`` is a freshly-allocated
+        slice over the populated prefix of the per-step scratch buffer.
+        """
+        device_str = str(self._device)
+        # Reset counter; reuse the persistent scratch buffer.
+        self._env_indices_count.zero_()
+        wp.launch(
+            camera_kernels.mask_to_indices_kernel,
+            dim=int(env_mask.shape[0]),
+            inputs=[env_mask, self._env_indices_scratch, self._env_indices_count],
+            device=device_str,
+        )
+        count = int(self._env_indices_count.numpy()[0])
+        if count == 0:
+            return wp.empty(0, dtype=wp.int32, device=device_str), 0
+        # Sub-view over the populated prefix; non-owning so the caller mustn't
+        # mutate the scratch buffer between calls.
+        sub = wp.array(
+            ptr=self._env_indices_scratch.ptr,
+            dtype=wp.int32,
+            shape=(count,),
+            device=self._env_indices_scratch.device,
+            copy=False,
+        )
+        return sub, count
+
     """
     Internal simulation callbacks.
     """
diff --git a/source/isaaclab/isaaclab/sensors/camera/camera_data.py b/source/isaaclab/isaaclab/sensors/camera/camera_data.py
index 40595e40d1c5..01eb7bacb44e 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera_data.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera_data.py
@@ -8,20 +8,15 @@
 from dataclasses import dataclass
 from typing import Any
 
-import torch
 import warp as wp
 
 # Re-exported as part of the public isaaclab.sensors.camera API
 from isaaclab.renderers.output_contract import RenderBufferKind, RenderBufferSpec
-from isaaclab.utils.math import convert_camera_frame_orientation_convention
 from isaaclab.utils.warp.proxy_array import ProxyArray
 
-__all__ = ["CameraData", "RenderBufferKind", "RenderBufferSpec"]
-
+from .orientation_conventions import convert_quat_array
 
-def _wrap(buf: torch.Tensor) -> ProxyArray:
-    """Wrap a torch buffer as a :class:`ProxyArray` with a zero-copy ``wp.array`` view."""
-    return ProxyArray(wp.from_torch(buf))
+__all__ = ["CameraData", "RenderBufferKind", "RenderBufferSpec"]
 
 
 @dataclass
@@ -89,16 +84,21 @@ def allocate(
         height: int,
         width: int,
         num_views: int,
-        device: torch.device | str,
+        device: str,
         supported_specs: dict[RenderBufferKind, RenderBufferSpec],
     ) -> CameraData:
         """Build a :class:`CameraData` with output buffers pre-allocated.
 
-        Allocates one ``(num_views, height, width, channels)`` buffer per kind
-        in the intersection of ``data_types`` and ``supported_specs``, using
-        the channels and dtype from each :class:`RenderBufferSpec`. Each buffer
-        is exposed as a :class:`ProxyArray` (``wp.array`` primary, zero-copy
-        ``torch.Tensor`` view via ``.torch``).
+        Allocates one ``(num_views, height, width, channels)`` :class:`warp.array`
+        per kind in the intersection of ``data_types`` and ``supported_specs``,
+        using the channels and dtype from each :class:`RenderBufferSpec`. Each
+        buffer is exposed as a :class:`ProxyArray` (``wp.array`` primary,
+        zero-copy ``torch.Tensor`` view via ``.torch``).
+
+        When the renderer publishes both ``"rgb"`` and ``"rgba"`` and the user
+        requests either, only the ``"rgba"`` buffer is allocated; the ``"rgb"``
+        entry is a non-owning :class:`warp.array` view over the first three
+        channels of the same memory.
 
         Args:
             data_types: Requested output names (typically :attr:`CameraCfg.data_types`).
@@ -106,7 +106,7 @@ def allocate(
             height: Image height in pixels.
             width: Image width in pixels.
             num_views: Number of camera views (batch dimension).
-            device: Torch device on which to allocate the buffers.
+            device: Warp device string on which to allocate the buffers.
             supported_specs: Per-buffer layout the active renderer can produce,
                 keyed by :class:`RenderBufferKind`. Names absent from this mapping
                 are not allocated.
@@ -128,8 +128,9 @@ def allocate(
                 unknown.append(name)
         if unknown:
             raise ValueError(f"Unknown RenderBufferKind name(s): {unknown}. Expected members of RenderBufferKind.")
-        # rgb is exposed as a view into rgba when the renderer publishes both,
-        # so requesting either one allocates the shared rgba buffer.
+        # rgb is exposed as a non-owning wp.array view over the rgba buffer when
+        # the renderer publishes both, so requesting either one allocates the
+        # shared rgba buffer.
         rgb_alias = (
             RenderBufferKind.RGBA in supported_specs
             and RenderBufferKind.RGB in supported_specs
@@ -138,28 +139,32 @@ def allocate(
         if rgb_alias:
             requested.update({RenderBufferKind.RGB, RenderBufferKind.RGBA})
 
-        # Allocate the underlying torch buffers first so we can preserve the
-        # rgb-as-view-of-rgba alias as a torch slice (shared memory). Each
-        # buffer is then wrapped as a ProxyArray over a wp.from_torch view —
-        # the wp.array and torch.Tensor share the same allocation.
-        torch_buffers: dict[str, torch.Tensor] = {}
+        device_str = str(device)
+        buffers: dict[str, ProxyArray] = {}
         for name, spec in supported_specs.items():
             if name not in requested:
                 continue
             if rgb_alias and name == RenderBufferKind.RGB:
                 continue
-            torch_buffers[str(name)] = torch.zeros(
-                (num_views, height, width, spec.channels),
-                dtype=spec.dtype,
-                device=device,
-            ).contiguous()
+            arr = wp.zeros((num_views, height, width, spec.channels), dtype=spec.dtype, device=device_str)
+            buffers[str(name)] = ProxyArray(arr)
         if rgb_alias:
-            # Share storage with rgba: the slice is a non-contiguous torch view.
-            # ``wp.from_torch`` accepts strided views, so the resulting ProxyArray
-            # tracks the same underlying memory as the rgba buffer.
-            torch_buffers[str(RenderBufferKind.RGB)] = torch_buffers[str(RenderBufferKind.RGBA)][..., :3]
-
-        buffers: dict[str, ProxyArray] = {name: _wrap(buf) for name, buf in torch_buffers.items()}
+            rgba_arr = buffers[str(RenderBufferKind.RGBA)].warp
+            rgba_spec = supported_specs[RenderBufferKind.RGBA]
+            rgb_channels = supported_specs[RenderBufferKind.RGB].channels
+            # Construct a non-owning wp.array view that shares storage with rgba
+            # but reports the rgb channel count on the last axis. Strides are
+            # inherited from the contiguous rgba allocation, leaving the rgb
+            # view non-contiguous (last axis stride > element size).
+            rgb_view = wp.array(
+                ptr=rgba_arr.ptr,
+                dtype=rgba_spec.dtype,
+                shape=(num_views, height, width, rgb_channels),
+                strides=rgba_arr.strides,
+                device=rgba_arr.device,
+                copy=False,
+            )
+            buffers[str(RenderBufferKind.RGB)] = ProxyArray(rgb_view)
 
         return cls(
             image_shape=(height, width),
@@ -180,8 +185,7 @@ def quat_w_ros(self) -> ProxyArray:
 
         Shape is (N, 4) where N is the number of sensors.
         """
-        converted = convert_camera_frame_orientation_convention(self.quat_w_world.torch, origin="world", target="ros")
-        return _wrap(converted.contiguous())
+        return ProxyArray(convert_quat_array(self.quat_w_world.warp, origin="world", target="ros"))
 
     @property
     def quat_w_opengl(self) -> ProxyArray:
@@ -193,7 +197,4 @@ def quat_w_opengl(self) -> ProxyArray:
 
         Shape is (N, 4) where N is the number of sensors.
         """
-        converted = convert_camera_frame_orientation_convention(
-            self.quat_w_world.torch, origin="world", target="opengl"
-        )
-        return _wrap(converted.contiguous())
+        return ProxyArray(convert_quat_array(self.quat_w_world.warp, origin="world", target="opengl"))
diff --git a/source/isaaclab/isaaclab/sensors/camera/kernels.py b/source/isaaclab/isaaclab/sensors/camera/kernels.py
new file mode 100644
index 000000000000..4a94fff0ea66
--- /dev/null
+++ b/source/isaaclab/isaaclab/sensors/camera/kernels.py
@@ -0,0 +1,318 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Warp kernels backing the standard :class:`~isaaclab.sensors.camera.Camera`.
+
+Each kernel keeps the camera's runtime fully Warp-native: there is no torch
+involvement in the per-step pose / intrinsic / frame-counter updates.
+"""
+
+from __future__ import annotations
+
+import warp as wp
+
+
+@wp.kernel
+def masked_increment_int64_kernel(
+    mask: wp.array(dtype=wp.bool),
+    counter: wp.array(dtype=wp.int64),
+):
+    """Increment ``counter[i]`` by one for every ``i`` where ``mask[i]`` is true."""
+    tid = wp.tid()
+    if mask[tid]:
+        counter[tid] = counter[tid] + wp.int64(1)
+
+
+@wp.kernel
+def masked_set_int64_kernel(
+    mask: wp.array(dtype=wp.bool),
+    value: wp.int64,
+    target: wp.array(dtype=wp.int64),
+):
+    """Overwrite ``target[i]`` with ``value`` for every ``i`` where ``mask[i]`` is true."""
+    tid = wp.tid()
+    if mask[tid]:
+        target[tid] = value
+
+
+@wp.kernel
+def masked_set_vec3f_kernel(
+    mask: wp.array(dtype=wp.bool),
+    src: wp.array(dtype=wp.vec3f),
+    dst: wp.array(dtype=wp.vec3f),
+):
+    """For every ``i`` where ``mask[i]`` is true, assign ``dst[i] = src[i]``."""
+    tid = wp.tid()
+    if mask[tid]:
+        dst[tid] = src[tid]
+
+
+@wp.kernel
+def masked_set_quatf_kernel(
+    mask: wp.array(dtype=wp.bool),
+    src: wp.array(dtype=wp.quatf),
+    dst: wp.array(dtype=wp.quatf),
+):
+    """For every ``i`` where ``mask[i]`` is true, assign ``dst[i] = src[i]``."""
+    tid = wp.tid()
+    if mask[tid]:
+        dst[tid] = src[tid]
+
+
+@wp.kernel
+def write_intrinsic_matrices_kernel(
+    env_ids: wp.array(dtype=wp.int32),
+    fx: wp.array(dtype=wp.float32),
+    fy: wp.array(dtype=wp.float32),
+    cx: wp.array(dtype=wp.float32),
+    cy: wp.array(dtype=wp.float32),
+    target: wp.array(dtype=wp.float32, ndim=3),
+):
+    """Scatter intrinsic-matrix scalars (``f_x``, ``f_y``, ``c_x``, ``c_y``, 1.0)
+    into ``target[env_ids[t], :, :]`` for ``t`` in ``[0, env_ids.shape[0])``.
+    """
+    tid = wp.tid()
+    i = env_ids[tid]
+    target[i, 0, 0] = fx[tid]
+    target[i, 0, 2] = cx[tid]
+    target[i, 1, 1] = fy[tid]
+    target[i, 1, 2] = cy[tid]
+    target[i, 2, 2] = wp.float32(1.0)
+
+
+@wp.kernel
+def vec3f_view_to_torch_layout_kernel(
+    src: wp.array(dtype=wp.vec3f),
+    dst: wp.array(dtype=wp.float32, ndim=2),
+):
+    """Copy from a ``(N,)`` array of ``wp.vec3f`` into a ``(N, 3)`` float32 array."""
+    tid = wp.tid()
+    v = src[tid]
+    dst[tid, 0] = v[0]
+    dst[tid, 1] = v[1]
+    dst[tid, 2] = v[2]
+
+
+@wp.kernel
+def quatf_view_to_torch_layout_kernel(
+    src: wp.array(dtype=wp.quatf),
+    dst: wp.array(dtype=wp.float32, ndim=2),
+):
+    """Copy from a ``(N,)`` array of ``wp.quatf`` into a ``(N, 4)`` float32 array."""
+    tid = wp.tid()
+    q = src[tid]
+    dst[tid, 0] = q[0]
+    dst[tid, 1] = q[1]
+    dst[tid, 2] = q[2]
+    dst[tid, 3] = q[3]
+
+
+@wp.kernel
+def torch_layout_to_vec3f_kernel(
+    src: wp.array(dtype=wp.float32, ndim=2),
+    dst: wp.array(dtype=wp.vec3f),
+):
+    """Copy from a ``(N, 3)`` float32 array into a ``(N,)`` ``wp.vec3f`` array."""
+    tid = wp.tid()
+    dst[tid] = wp.vec3f(src[tid, 0], src[tid, 1], src[tid, 2])
+
+
+@wp.kernel
+def torch_layout_to_quatf_kernel(
+    src: wp.array(dtype=wp.float32, ndim=2),
+    dst: wp.array(dtype=wp.quatf),
+):
+    """Copy from a ``(N, 4)`` float32 array into a ``(N,)`` ``wp.quatf`` array."""
+    tid = wp.tid()
+    dst[tid] = wp.quatf(src[tid, 0], src[tid, 1], src[tid, 2], src[tid, 3])
+
+
+@wp.kernel
+def mask_to_indices_kernel(
+    mask: wp.array(dtype=wp.bool),
+    indices: wp.array(dtype=wp.int32),
+    counter: wp.array(dtype=wp.int32),
+):
+    """Compact the indices where ``mask[i]`` is true into ``indices[0..counter[0])``.
+
+    ``counter`` must be zero-initialized before launch. ``indices`` must have
+    capacity at least ``mask.shape[0]``.
+    """
+    tid = wp.tid()
+    if mask[tid]:
+        idx = wp.atomic_add(counter, 0, 1)
+        indices[idx] = wp.int32(tid)
+
+
+@wp.kernel
+def indices_to_mask_kernel(
+    indices: wp.array(dtype=wp.int32),
+    mask: wp.array(dtype=wp.bool),
+):
+    """For each ``i`` in ``indices``, set ``mask[indices[i]] = True``.
+
+    ``mask`` must be zero-initialized before launch.
+    """
+    tid = wp.tid()
+    mask[indices[tid]] = True
+
+
+@wp.kernel
+def scatter_vec3f_kernel(
+    indices: wp.array(dtype=wp.int32),
+    src: wp.array(dtype=wp.vec3),
+    dst: wp.array(dtype=wp.vec3),
+):
+    """``dst[indices[t]] = src[t]`` for ``t`` in ``[0, indices.shape[0])``."""
+    tid = wp.tid()
+    dst[indices[tid]] = src[tid]
+
+
+@wp.kernel
+def scatter_quatf_kernel(
+    indices: wp.array(dtype=wp.int32),
+    src: wp.array(dtype=wp.quatf),
+    dst: wp.array(dtype=wp.quatf),
+):
+    """``dst[indices[t]] = src[t]`` for ``t`` in ``[0, indices.shape[0])``."""
+    tid = wp.tid()
+    dst[indices[tid]] = src[tid]
+
+
+@wp.kernel
+def combine_frame_transforms_kernel(
+    p1: wp.array(dtype=wp.vec3),
+    q1: wp.array(dtype=wp.quatf),
+    p2: wp.array(dtype=wp.vec3),
+    q2: wp.array(dtype=wp.quatf),
+    out_p: wp.array(dtype=wp.vec3),
+    out_q: wp.array(dtype=wp.quatf),
+):
+    """Compose two rigid transforms: ``out = T1 ∘ T2`` (parent ∘ child).
+
+    Mirrors :func:`isaaclab.utils.math.combine_frame_transforms`.
+    """
+    tid = wp.tid()
+    out_p[tid] = p1[tid] + wp.quat_rotate(q1[tid], p2[tid])
+    out_q[tid] = q1[tid] * q2[tid]
+
+
+@wp.kernel
+def gather_vec3f_kernel(
+    indices: wp.array(dtype=wp.int32),
+    src: wp.array(dtype=wp.vec3),
+    dst: wp.array(dtype=wp.vec3),
+):
+    """``dst[t] = src[indices[t]]`` for ``t`` in ``[0, indices.shape[0])``."""
+    tid = wp.tid()
+    dst[tid] = src[indices[tid]]
+
+
+@wp.kernel
+def gather_quatf_kernel(
+    indices: wp.array(dtype=wp.int32),
+    src: wp.array(dtype=wp.quatf),
+    dst: wp.array(dtype=wp.quatf),
+):
+    """``dst[t] = src[indices[t]]`` for ``t`` in ``[0, indices.shape[0])``."""
+    tid = wp.tid()
+    dst[tid] = src[indices[tid]]
+
+
+@wp.kernel
+def quat_inverse_apply_kernel(
+    q: wp.array(dtype=wp.quatf),
+    v: wp.array(dtype=wp.vec3),
+    out: wp.array(dtype=wp.vec3),
+):
+    """``out[i] = quat_rotate(quat_inverse(q[i]), v[i])`` — rotate ``v`` by the inverse of ``q``."""
+    tid = wp.tid()
+    out[tid] = wp.quat_rotate_inv(q[tid], v[tid])
+
+
+@wp.kernel
+def quat_inverse_multiply_kernel(
+    q1: wp.array(dtype=wp.quatf),
+    q2: wp.array(dtype=wp.quatf),
+    out: wp.array(dtype=wp.quatf),
+):
+    """``out[i] = quat_inverse(q1[i]) * q2[i]``."""
+    tid = wp.tid()
+    out[tid] = wp.quat_inverse(q1[tid]) * q2[tid]
+
+
+@wp.kernel
+def vec3_subtract_kernel(
+    a: wp.array(dtype=wp.vec3),
+    b: wp.array(dtype=wp.vec3),
+    out: wp.array(dtype=wp.vec3),
+):
+    """``out[i] = a[i] - b[i]``."""
+    tid = wp.tid()
+    out[tid] = a[tid] - b[tid]
+
+
+@wp.kernel
+def look_at_quat_kernel(
+    eyes: wp.array(dtype=wp.vec3),
+    targets: wp.array(dtype=wp.vec3),
+    up_world: wp.vec3,
+    out: wp.array(dtype=wp.quatf),
+):
+    """For each camera ``i``, compute the OpenGL-convention look-at quaternion that
+    points the camera from ``eyes[i]`` toward ``targets[i]`` with the supplied
+    world up-axis. The result is stored in ``out[i]`` as ``wp.quatf`` (x, y, z, w).
+
+    Mirrors :func:`isaaclab.utils.math.create_rotation_matrix_from_view` followed by
+    :func:`isaaclab.utils.math.quat_from_matrix` so external behavior is preserved
+    while the implementation remains pure warp.
+    """
+    tid = wp.tid()
+    # OpenGL camera convention: forward = -Z, up = +Y, right = +X.
+    forward = wp.normalize(targets[tid] - eyes[tid])
+    right = wp.normalize(wp.cross(forward, up_world))
+    cam_up = wp.cross(right, forward)
+    # Rotation matrix columns: (right, up, -forward)
+    m00 = right[0]
+    m01 = cam_up[0]
+    m02 = -forward[0]
+    m10 = right[1]
+    m11 = cam_up[1]
+    m12 = -forward[1]
+    m20 = right[2]
+    m21 = cam_up[2]
+    m22 = -forward[2]
+    # Convert to quaternion (xyzw) using the pytorch3d-style algorithm.
+    # 4 candidate q_abs^2 components: (4w^2, 4x^2, 4y^2, 4z^2).
+    qaw = wp.sqrt(wp.max(0.0, 1.0 + m00 + m11 + m22))
+    qax = wp.sqrt(wp.max(0.0, 1.0 + m00 - m11 - m22))
+    qay = wp.sqrt(wp.max(0.0, 1.0 - m00 + m11 - m22))
+    qaz = wp.sqrt(wp.max(0.0, 1.0 - m00 - m11 + m22))
+    # Pick the largest one and recover signed components from off-diagonal entries.
+    if qaw >= qax and qaw >= qay and qaw >= qaz:
+        denom = 2.0 * wp.max(qaw, 0.1)
+        qx = (m21 - m12) / denom
+        qy = (m02 - m20) / denom
+        qz = (m10 - m01) / denom
+        qw = qaw * qaw / denom
+    elif qax >= qay and qax >= qaz:
+        denom = 2.0 * wp.max(qax, 0.1)
+        qx = qax * qax / denom
+        qy = (m10 + m01) / denom
+        qz = (m02 + m20) / denom
+        qw = (m21 - m12) / denom
+    elif qay >= qaz:
+        denom = 2.0 * wp.max(qay, 0.1)
+        qx = (m10 + m01) / denom
+        qy = qay * qay / denom
+        qz = (m12 + m21) / denom
+        qw = (m02 - m20) / denom
+    else:
+        denom = 2.0 * wp.max(qaz, 0.1)
+        qx = (m20 + m02) / denom
+        qy = (m21 + m12) / denom
+        qz = qaz * qaz / denom
+        qw = (m10 - m01) / denom
+    out[tid] = wp.quatf(qx, qy, qz, qw)
diff --git a/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py b/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py
new file mode 100644
index 000000000000..b9ed24778e98
--- /dev/null
+++ b/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py
@@ -0,0 +1,240 @@
+# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""Warp-only camera orientation convention conversions.
+
+Mirrors :func:`isaaclab.utils.math.convert_camera_frame_orientation_convention` for
+``wp.array(dtype=wp.quatf)`` inputs without involving torch. Each conversion is a
+fixed quaternion post-multiplication, with the convention quaternions baked in at
+import time from a small numpy computation.
+"""
+
+from __future__ import annotations
+
+import math
+from typing import Literal
+
+import numpy as np
+import warp as wp
+
+# --- Conversion quaternion derivation ---------------------------------------------------------
+#
+# The torch reference (``isaaclab.utils.math.convert_camera_frame_orientation_convention``)
+# expresses each conversion as a 3x3 rotation matrix multiplied on the right of the source
+# rotation matrix. Equivalently, in quaternion form ``q_target = q_src * q_conv``.
+#
+# Conventions (each describing the camera local axes in their frame):
+#   * opengl: forward = -Z, up = +Y
+#   * ros:    forward = +Z, up = -Y     (180 deg rotation around X relative to opengl)
+#   * world:  forward = +X, up = +Z
+#
+# All quaternions are stored in (x, y, z, w) order to match :class:`warp.quatf`.
+
+
+def _euler_xyz_intrinsic_to_matrix(rx: float, ry: float, rz: float) -> np.ndarray:
+    """Intrinsic XYZ Euler angles to a 3x3 rotation matrix (R = Rx @ Ry @ Rz)."""
+    cx, sx = math.cos(rx), math.sin(rx)
+    cy, sy = math.cos(ry), math.sin(ry)
+    cz, sz = math.cos(rz), math.sin(rz)
+    rxm = np.array([[1.0, 0.0, 0.0], [0.0, cx, -sx], [0.0, sx, cx]])
+    rym = np.array([[cy, 0.0, sy], [0.0, 1.0, 0.0], [-sy, 0.0, cy]])
+    rzm = np.array([[cz, -sz, 0.0], [sz, cz, 0.0], [0.0, 0.0, 1.0]])
+    return rxm @ rym @ rzm
+
+
+def _matrix_to_quat_xyzw(rotm: np.ndarray) -> tuple[float, float, float, float]:
+    """Convert a 3x3 rotation matrix to a quaternion ``(x, y, z, w)``.
+
+    Mirrors :func:`isaaclab.utils.math.quat_from_matrix` (the pytorch3d
+    algorithm) so the sign convention matches the torch reference for any
+    given input.
+    """
+    m00, m01, m02 = float(rotm[0, 0]), float(rotm[0, 1]), float(rotm[0, 2])
+    m10, m11, m12 = float(rotm[1, 0]), float(rotm[1, 1]), float(rotm[1, 2])
+    m20, m21, m22 = float(rotm[2, 0]), float(rotm[2, 1]), float(rotm[2, 2])
+
+    q_abs_sq = (
+        1.0 + m00 + m11 + m22,  # 4 * w^2
+        1.0 + m00 - m11 - m22,  # 4 * x^2
+        1.0 - m00 + m11 - m22,  # 4 * y^2
+        1.0 - m00 - m11 + m22,  # 4 * z^2
+    )
+    q_abs = tuple(math.sqrt(max(0.0, v)) for v in q_abs_sq)
+
+    # Candidate (x, y, z, w) tuples — one per branch.
+    candidates = (
+        (m21 - m12, m02 - m20, m10 - m01, q_abs[0] ** 2),
+        (q_abs[1] ** 2, m10 + m01, m02 + m20, m21 - m12),
+        (m10 + m01, q_abs[2] ** 2, m12 + m21, m02 - m20),
+        (m20 + m02, m21 + m12, q_abs[3] ** 2, m10 - m01),
+    )
+
+    # Pick the branch with the largest q_abs (best-conditioned). Match the
+    # torch reference's denominator floor of 0.1 to keep the algorithm bit-equivalent.
+    idx = max(range(4), key=lambda i: q_abs[i])
+    cand = candidates[idx]
+    denom = 2.0 * max(q_abs[idx], 0.1)
+    return cand[0] / denom, cand[1] / denom, cand[2] / denom, cand[3] / denom
+
+
+# 180 deg rotation around X axis: q = (1, 0, 0, 0) in (x, y, z, w). 180-deg rotations are
+# self-inverse, so the same quaternion serves both directions for ros<->opengl.
+_Q_FLIP_YZ = (1.0, 0.0, 0.0, 0.0)
+# The torch reference (``isaaclab.utils.math.convert_camera_frame_orientation_convention``)
+# post-multiplies by matrix_from_euler([pi/2, -pi/2, 0], "XYZ") for world->opengl and by
+# the *transpose* of the same matrix for opengl->world. As a quaternion, the transpose of
+# a unit rotation matrix is the inverse — i.e. the conjugate of the corresponding quat.
+_Q_WORLD_TO_OPENGL = _matrix_to_quat_xyzw(_euler_xyz_intrinsic_to_matrix(math.pi / 2, -math.pi / 2, 0.0))
+_Q_OPENGL_TO_WORLD = (
+    -_Q_WORLD_TO_OPENGL[0],
+    -_Q_WORLD_TO_OPENGL[1],
+    -_Q_WORLD_TO_OPENGL[2],
+    _Q_WORLD_TO_OPENGL[3],
+)
+
+
+def _identity() -> tuple[float, float, float, float]:
+    return 0.0, 0.0, 0.0, 1.0
+
+
+def _quat_mul_xyzw(
+    a: tuple[float, float, float, float], b: tuple[float, float, float, float]
+) -> tuple[float, float, float, float]:
+    """Hamilton product on (x, y, z, w) quaternions, returning a * b."""
+    ax, ay, az, aw = a
+    bx, by, bz, bw = b
+    return (
+        aw * bx + ax * bw + ay * bz - az * by,
+        aw * by - ax * bz + ay * bw + az * bx,
+        aw * bz + ax * by - ay * bx + az * bw,
+        aw * bw - ax * bx - ay * by - az * bz,
+    )
+
+
+def _resolve_conversion_quat(
+    origin: Literal["opengl", "ros", "world"],
+    target: Literal["opengl", "ros", "world"],
+) -> tuple[float, float, float, float]:
+    """Return the right-multiplication quaternion that takes ``origin`` to ``target``."""
+    if origin == target:
+        return _identity()
+
+    # First take origin -> opengl on the right.
+    if origin == "ros":
+        to_opengl = _Q_FLIP_YZ
+    elif origin == "world":
+        to_opengl = _Q_WORLD_TO_OPENGL
+    else:  # origin == "opengl"
+        to_opengl = _identity()
+
+    # Then take opengl -> target on the right.
+    if target == "ros":
+        from_opengl = _Q_FLIP_YZ
+    elif target == "world":
+        from_opengl = _Q_OPENGL_TO_WORLD
+    else:  # target == "opengl"
+        from_opengl = _identity()
+
+    # Combined right-multiplier: q_target = q_src * to_opengl * from_opengl
+    return _quat_mul_xyzw(to_opengl, from_opengl)
+
+
+@wp.kernel
+def _quat_post_multiply_kernel(
+    src: wp.array(dtype=wp.quatf),
+    qx: float,
+    qy: float,
+    qz: float,
+    qw: float,
+    dst: wp.array(dtype=wp.quatf),
+):
+    """For each i, dst[i] = src[i] * (qx, qy, qz, qw)."""
+    tid = wp.tid()
+    s = src[tid]
+    sx = s[0]
+    sy = s[1]
+    sz = s[2]
+    sw = s[3]
+    # Hamilton product (x, y, z, w) order.
+    rx = sw * qx + sx * qw + sy * qz - sz * qy
+    ry = sw * qy - sx * qz + sy * qw + sz * qx
+    rz = sw * qz + sx * qy - sy * qx + sz * qw
+    rw = sw * qw - sx * qx - sy * qy - sz * qz
+    dst[tid] = wp.quatf(rx, ry, rz, rw)
+
+
+@wp.kernel
+def _quat_xyzw_post_multiply_kernel(
+    src: wp.array(dtype=wp.float32, ndim=2),
+    qx: float,
+    qy: float,
+    qz: float,
+    qw: float,
+    dst: wp.array(dtype=wp.float32, ndim=2),
+):
+    """Variant for ``(N, 4)`` float arrays storing (x, y, z, w) per row."""
+    tid = wp.tid()
+    sx = src[tid, 0]
+    sy = src[tid, 1]
+    sz = src[tid, 2]
+    sw = src[tid, 3]
+    dst[tid, 0] = sw * qx + sx * qw + sy * qz - sz * qy
+    dst[tid, 1] = sw * qy - sx * qz + sy * qw + sz * qx
+    dst[tid, 2] = sw * qz + sx * qy - sy * qx + sz * qw
+    dst[tid, 3] = sw * qw - sx * qx - sy * qy - sz * qz
+
+
+def convert_quat_array(
+    src: wp.array,
+    origin: Literal["opengl", "ros", "world"] = "opengl",
+    target: Literal["opengl", "ros", "world"] = "ros",
+) -> wp.array:
+    """Convert a quaternion array from ``origin`` to ``target`` camera convention.
+
+    Args:
+        src: Source quaternions. Either ``wp.array(dtype=wp.quatf, shape=(N,))`` or
+            ``wp.array(dtype=wp.float32, shape=(N, 4))`` with ``(x, y, z, w)`` rows.
+        origin: Source convention.
+        target: Target convention.
+
+    Returns:
+        A freshly-allocated :class:`warp.array` of the same dtype/shape with the
+        converted quaternions.
+    """
+    qx, qy, qz, qw = _resolve_conversion_quat(origin, target)
+    if origin == target:
+        # Defensive copy so callers can mutate the result without touching ``src``.
+        out = wp.empty_like(src)
+        wp.copy(out, src)
+        return out
+
+    if src.dtype is wp.quatf:
+        n = int(src.shape[0])
+        dst = wp.empty(n, dtype=wp.quatf, device=src.device)
+        wp.launch(
+            _quat_post_multiply_kernel,
+            dim=n,
+            inputs=[src, float(qx), float(qy), float(qz), float(qw)],
+            outputs=[dst],
+            device=src.device,
+        )
+        return dst
+
+    if src.dtype is wp.float32 and len(src.shape) == 2 and src.shape[1] == 4:
+        n = int(src.shape[0])
+        dst = wp.empty((n, 4), dtype=wp.float32, device=src.device)
+        wp.launch(
+            _quat_xyzw_post_multiply_kernel,
+            dim=n,
+            inputs=[src, float(qx), float(qy), float(qz), float(qw)],
+            outputs=[dst],
+            device=src.device,
+        )
+        return dst
+
+    raise TypeError(
+        f"convert_quat_array: unsupported source array (dtype={src.dtype}, shape={src.shape})."
+        " Expected wp.quatf shape (N,) or wp.float32 shape (N, 4)."
+    )
diff --git a/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py b/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py
index 56a8078c78b1..e0849b27abe2 100644
--- a/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py
+++ b/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py
@@ -37,11 +37,12 @@
 logger = logging.getLogger(__name__)
 
 
-def _to_device_tensor(value: wp.array | ProxyArray | torch.Tensor, device: str | torch.device) -> torch.Tensor:
-    """Normalize an array-like input to a ``torch.Tensor`` on ``device``.
+def _as_torch_tensor(value: wp.array | ProxyArray | torch.Tensor, device: str | torch.device) -> torch.Tensor:
+    """Bring a wp/ProxyArray/torch input to a contiguous ``torch.Tensor`` on ``device``.
 
-    Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`,
-    and :class:`torch.Tensor`. Warp inputs are unwrapped via ``wp.to_torch`` (zero-copy).
+    This helper exists only at the boundary where this class still calls into the
+    torch-based ``isaaclab.utils.math`` quaternion / pose utilities. The returned
+    tensor is a zero-copy view of the underlying warp memory whenever possible.
     """
     if isinstance(value, ProxyArray):
         value = value.torch
@@ -169,7 +170,7 @@ def set_intrinsic_matrices(
         if env_ids is None:
             env_ids = slice(None)
         # normalize matrices input to torch on sensor device
-        matrices = _to_device_tensor(matrices, self._device)
+        matrices = _as_torch_tensor(matrices, self._device)
         # save new intrinsic matrices and focal length (write via the zero-copy torch view)
         intrinsic_view = self._data.intrinsic_matrices.torch
         intrinsic_view[env_ids] = matrices
@@ -251,12 +252,12 @@ def set_world_poses(
         pos_w_torch = pos_w.torch
         quat_w_torch = quat_w.torch
         if positions is not None:
-            positions = _to_device_tensor(positions, self._device)
+            positions = _as_torch_tensor(positions, self._device)
             # transform to camera frame
             pos_offset_world_frame = positions - pos_w_torch
             self._offset_pos[env_ids] = math_utils.quat_apply(math_utils.quat_inv(quat_w_torch), pos_offset_world_frame)
         if orientations is not None:
-            orientations = _to_device_tensor(orientations, self._device)
+            orientations = _as_torch_tensor(orientations, self._device)
             # convert rotation matrix from input convention to world
             quat_w_set = math_utils.convert_camera_frame_orientation_convention(
                 orientations, origin=convention, target="world"
@@ -291,8 +292,8 @@ def set_world_poses_from_view(
             RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first.
             NotImplementedError: If the stage up-axis is not "Y" or "Z".
         """
-        eyes = _to_device_tensor(eyes, self._device)
-        targets = _to_device_tensor(targets, self._device)
+        eyes = _as_torch_tensor(eyes, self._device)
+        targets = _as_torch_tensor(targets, self._device)
         # get up axis of current stage
         up_axis = UsdGeom.GetStageUpAxis(self.stage)
         # camera position and rotation in opengl convention
diff --git a/source/isaaclab/isaaclab/utils/warp/kernels.py b/source/isaaclab/isaaclab/utils/warp/kernels.py
index efcdbfe63f1e..c656e0f0e996 100644
--- a/source/isaaclab/isaaclab/utils/warp/kernels.py
+++ b/source/isaaclab/isaaclab/utils/warp/kernels.py
@@ -326,6 +326,32 @@ def raycast_dynamic_meshes_kernel(
                 ray_mesh_id[tid_env, tid_ray] = wp.int16(tid_mesh_id)
 
 
+@wp.kernel
+def clamp_above_to_inf_kernel(
+    threshold: wp.float32,
+    target: wp.array(dtype=wp.float32, ndim=4),
+):
+    """For every element ``v`` of ``target``, set ``v = +inf`` when ``v > threshold``.
+
+    Used by camera renderers to enforce that out-of-range depth-style outputs
+    propagate as ``inf`` regardless of the upstream clipping convention.
+    """
+    n, h, w, c = wp.tid()
+    if target[n, h, w, c] > threshold:
+        target[n, h, w, c] = wp.float32(wp.inf)
+
+
+@wp.kernel
+def replace_inf_kernel(
+    replacement: wp.float32,
+    target: wp.array(dtype=wp.float32, ndim=4),
+):
+    """For every element ``v`` of ``target``, set ``v = replacement`` when ``isinf(v)``."""
+    n, h, w, c = wp.tid()
+    if wp.isinf(target[n, h, w, c]):
+        target[n, h, w, c] = replacement
+
+
 @wp.kernel(enable_backward=False)
 def reshape_tiled_image(
     tiled_image_buffer: Any,
diff --git a/source/isaaclab/test/renderers/test_camera_output_contract.py b/source/isaaclab/test/renderers/test_camera_output_contract.py
index 208a814a796a..d8f26fa35058 100644
--- a/source/isaaclab/test/renderers/test_camera_output_contract.py
+++ b/source/isaaclab/test/renderers/test_camera_output_contract.py
@@ -9,6 +9,7 @@
 
 import pytest
 import torch
+import warp as wp
 
 pytest.importorskip("isaaclab_physx")
 
@@ -148,10 +149,10 @@ def test_camera_data_allocates_supported_subset_and_aliases_rgb():
     """CameraData allocates the intersection of requested + supported and aliases rgb into rgba."""
     cfg = _make_camera_cfg(["rgb", "rgba", "depth"])
     specs = {
-        RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-        RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
-        RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
-        RenderBufferKind.NORMALS: RenderBufferSpec(3, torch.float32),
+        RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+        RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
+        RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32),
+        RenderBufferKind.NORMALS: RenderBufferSpec(3, wp.float32),
     }
     data = CameraData.allocate(
         data_types=cfg.data_types, height=8, width=16, num_views=2, device="cpu", supported_specs=specs
@@ -173,8 +174,8 @@ def test_camera_data_drops_requested_types_not_in_supported_specs():
     """Requested types absent from supported_specs are absent from data.output."""
     cfg = _make_camera_cfg(["rgb", "normals"])
     specs = {
-        RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-        RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
+        RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+        RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
     }
     data = CameraData.allocate(
         data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=specs
@@ -198,8 +199,8 @@ def test_camera_data_no_arg_construction_yields_empty_container():
 def test_camera_data_segmentation_dtype_follows_supported_spec():
     """CameraData consumes the layout dtype declared by the renderer spec."""
     cfg = _make_camera_cfg(["instance_segmentation_fast"])
-    raw_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(1, torch.int32)}
-    colorized_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(4, torch.uint8)}
+    raw_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(1, wp.int32)}
+    colorized_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(4, wp.uint8)}
 
     raw = CameraData.allocate(
         data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=raw_specs
@@ -218,9 +219,9 @@ def test_camera_data_output_proxyarray_zero_copy():
     """Mutations through .torch are visible through .warp on every output buffer."""
     cfg = _make_camera_cfg(["rgb", "rgba", "depth"])
     specs = {
-        RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-        RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
-        RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
+        RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+        RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
+        RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32),
     }
     data = CameraData.allocate(
         data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=specs
@@ -237,7 +238,7 @@ def test_camera_data_output_proxyarray_zero_copy():
 
 def test_camera_data_allocate_raises_on_unknown_name():
     """An unknown data_types name raises ValueError naming the offender."""
-    supported_specs = {RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8)}
+    supported_specs = {RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8)}
     with pytest.raises(ValueError) as exc_info:
         CameraData.allocate(
             data_types=["not_a_real_type"],
diff --git a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
index d6e314ed2d46..e16537b7676b 100644
--- a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
+++ b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py
@@ -8,17 +8,17 @@
 from __future__ import annotations
 
 import logging
+import math
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
 
 import newton
-import torch
 import warp as wp
 
 from isaaclab.renderers import BaseRenderer, RenderBufferKind, RenderBufferSpec
 from isaaclab.renderers.camera_render_spec import CameraRenderSpec
+from isaaclab.sensors.camera.orientation_conventions import convert_quat_array
 from isaaclab.sim import SimulationContext
-from isaaclab.utils.math import convert_camera_frame_orientation_convention
 
 from .newton_warp_renderer_cfg import NewtonWarpRendererCfg
 
@@ -83,29 +83,50 @@ def get_output(self, output_name: str) -> wp.array:
         return None
 
     def update(self, positions: wp.array, orientations: wp.array, intrinsics: wp.array):
-        # The convention helper expects a torch tensor; use the zero-copy view of the wp.array.
-        converted_orientations = convert_camera_frame_orientation_convention(
-            wp.to_torch(orientations), origin="world", target="opengl"
+        # Convert world-frame orientations to opengl in-place via a warp kernel
+        # (no torch involvement on the data path).
+        orientations_quatf = (
+            orientations
+            if orientations.dtype is wp.quatf
+            else wp.array(
+                ptr=orientations.ptr,
+                dtype=wp.quatf,
+                shape=(orientations.shape[0],),
+                device=orientations.device,
+                copy=False,
+            )
         )
+        converted_orientations = convert_quat_array(orientations_quatf, origin="world", target="opengl")
 
         self.camera_transforms = wp.empty(
             (1, self.newton_sensor.model.world_count), dtype=wp.transformf, device=self.newton_sensor.model.device
         )
+        positions_vec3 = (
+            positions
+            if positions.dtype is wp.vec3f
+            else wp.array(
+                ptr=positions.ptr,
+                dtype=wp.vec3f,
+                shape=(positions.shape[0],),
+                device=positions.device,
+                copy=False,
+            )
+        )
         wp.launch(
             RenderData._update_transforms,
             self.newton_sensor.model.world_count,
-            [positions, converted_orientations, self.camera_transforms],
+            [positions_vec3, converted_orientations, self.camera_transforms],
             device=self.newton_sensor.model.device,
         )
 
         if self.camera_rays is None:
-            intrinsics_torch = wp.to_torch(intrinsics)
-            first_focal_length = intrinsics_torch[:, 1, 1][0:1]
-            fov_radians_all = 2.0 * torch.atan(self.height / (2.0 * first_focal_length))
-
-            self.camera_rays = self.newton_sensor.utils.compute_pinhole_camera_rays(
-                self.width, self.height, wp.from_torch(fov_radians_all, dtype=wp.float32)
-            )
+            # Pull the focal length scalar via numpy (one element) to compute FOV
+            # without going through torch.
+            intrinsics_np = intrinsics.numpy() if intrinsics.device == "cpu" else intrinsics.numpy()
+            first_focal_length = float(intrinsics_np[0, 1, 1])
+            fov_radians = 2.0 * math.atan(self.height / (2.0 * first_focal_length))
+            fov_arr = wp.array([fov_radians], dtype=wp.float32, device=self.newton_sensor.model.device)
+            self.camera_rays = self.newton_sensor.utils.compute_pinhole_camera_rays(self.width, self.height, fov_arr)
 
     def _typed_view(self, wp_data: wp.array, dtype) -> wp.array:
         """Reinterpret the output buffer as ``(world_count, num_cameras, height, width)`` of ``dtype``.
@@ -177,16 +198,14 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]:
         """Publish the per-output layout this Newton Warp backend writes.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.supported_output_types`."""
         seg_spec = (
-            RenderBufferSpec(4, torch.uint8)
-            if self.cfg.colorize_instance_segmentation
-            else RenderBufferSpec(1, torch.int32)
+            RenderBufferSpec(4, wp.uint8) if self.cfg.colorize_instance_segmentation else RenderBufferSpec(1, wp.int32)
         )
         return {
-            RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
-            RenderBufferKind.ALBEDO: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.NORMALS: RenderBufferSpec(3, torch.float32),
+            RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
+            RenderBufferKind.ALBEDO: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.NORMALS: RenderBufferSpec(3, wp.float32),
             RenderBufferKind.INSTANCE_SEGMENTATION_FAST: seg_spec,
         }
 
diff --git a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py
index 624b984c690d..5aa785ece5b3 100644
--- a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py
+++ b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py
@@ -27,7 +27,6 @@
 logger = logging.getLogger(__name__)
 
 import numpy as np
-import torch
 import warp as wp
 
 # The ovrtx C library links to its own version of the USD libraries. Having
@@ -41,7 +40,7 @@
 from packaging.version import Version
 
 from isaaclab.renderers import BaseRenderer, RenderBufferKind, RenderBufferSpec
-from isaaclab.utils.math import convert_camera_frame_orientation_convention
+from isaaclab.sensors.camera.orientation_conventions import convert_quat_array
 
 from .ovrtx_renderer_cfg import OVRTXRendererCfg
 from .ovrtx_renderer_kernels import (
@@ -133,16 +132,16 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]:
         """Publish the per-output layout this OVRTX backend writes.
         See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.supported_output_types`."""
         return {
-            RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
-            RenderBufferKind.ALBEDO: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.SIMPLE_SHADING_CONSTANT_DIFFUSE: RenderBufferSpec(3, torch.uint8),
-            RenderBufferKind.SIMPLE_SHADING_DIFFUSE_MDL: RenderBufferSpec(3, torch.uint8),
-            RenderBufferKind.SIMPLE_SHADING_FULL_MDL: RenderBufferSpec(3, torch.uint8),
-            RenderBufferKind.SEMANTIC_SEGMENTATION: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, torch.float32),
+            RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
+            RenderBufferKind.ALBEDO: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.SIMPLE_SHADING_CONSTANT_DIFFUSE: RenderBufferSpec(3, wp.uint8),
+            RenderBufferKind.SIMPLE_SHADING_DIFFUSE_MDL: RenderBufferSpec(3, wp.uint8),
+            RenderBufferKind.SIMPLE_SHADING_FULL_MDL: RenderBufferSpec(3, wp.uint8),
+            RenderBufferKind.SEMANTIC_SEGMENTATION: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, wp.float32),
         }
 
     def __init__(self, cfg: OVRTXRendererCfg):
@@ -367,15 +366,8 @@ def create_render_data(self, spec: CameraRenderSpec) -> OVRTXRenderData:
             self.initialize(spec)
         return OVRTXRenderData(spec, DEVICE)
 
-    # Map torch dtypes to their warp counterparts for zero-copy wrapping.
-    _TORCH_TO_WP_DTYPE: dict[torch.dtype, Any] = {
-        torch.uint8: wp.uint8,
-        torch.float32: wp.float32,
-        torch.int32: wp.int32,
-    }
-
     def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, wp.array]) -> None:
-        """Wrap caller-owned ``wp.array`` output buffers with the strict per-kind dtype OVRTX expects.
+        """Wrap caller-owned ``wp.array`` output buffers under the strict per-kind dtype OVRTX expects.
 
         Aliased views over a contiguous sibling (e.g. ``rgb`` over ``rgba``) are
         skipped; any other non-contiguous array raises ``ValueError``.
@@ -392,20 +384,8 @@ def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, wp.ar
                     f"OVRTXRenderer.set_outputs: output '{name}' is non-contiguous and is not an"
                     " alias of a contiguous output buffer; cannot rewrap with the OVRTX dtype."
                 )
-            torch_dtype = wp.to_torch(arr).dtype
-            wp_dtype = self._TORCH_TO_WP_DTYPE.get(torch_dtype)
-            if wp_dtype is None:
-                raise ValueError(
-                    f"OVRTXRenderer.set_outputs: unsupported dtype {torch_dtype} for output"
-                    f" '{name}'. Add it to OVRTXRenderer._TORCH_TO_WP_DTYPE."
-                )
-            render_data.warp_buffers[name] = wp.array(
-                ptr=arr.ptr,
-                dtype=wp_dtype,
-                shape=tuple(arr.shape),
-                device=arr.device,
-                copy=False,
-            )
+            # Camera spec dtype is already a warp dtype; keep the buffer as-is.
+            render_data.warp_buffers[name] = arr
 
     def update_transforms(self) -> None:
         """Sync physics objects to OVRTX."""
@@ -443,14 +423,24 @@ def update_camera(
     ) -> None:
         """Update camera transforms in OVRTX binding."""
         num_envs = positions.shape[0]
-        # convention helper expects torch; use the zero-copy view of the wp.array
-        camera_quats_opengl = convert_camera_frame_orientation_convention(
-            wp.to_torch(orientations), origin="world", target="opengl"
+        # Convert orientations from world to opengl convention via a warp kernel.
+        orientations_quatf = (
+            orientations
+            if orientations.dtype is wp.quatf
+            else wp.array(
+                ptr=orientations.ptr,
+                dtype=wp.quatf,
+                shape=(num_envs,),
+                device=orientations.device,
+                copy=False,
+            )
         )
-        camera_positions_wp = wp.array(
-            ptr=positions.ptr, dtype=wp.vec3, shape=(positions.shape[0],), device=positions.device, copy=False
+        camera_orientations_wp = convert_quat_array(orientations_quatf, origin="world", target="opengl")
+        camera_positions_wp = (
+            positions
+            if positions.dtype is wp.vec3
+            else wp.array(ptr=positions.ptr, dtype=wp.vec3, shape=(num_envs,), device=positions.device, copy=False)
         )
-        camera_orientations_wp = wp.from_torch(camera_quats_opengl.contiguous(), dtype=wp.quatf)
         camera_transforms = wp.zeros(num_envs, dtype=wp.mat44d, device=DEVICE)
         wp.launch(
             kernel=create_camera_transforms_kernel,
@@ -573,8 +563,14 @@ def _process_render_frame(self, render_data: OVRTXRenderData, frame, output_buff
             with frame.render_vars[depth_var].map(device=Device.CUDA) as mapping:
                 tiled_depth_data = wp.from_dlpack(mapping.tensor)
                 if tiled_depth_data.dtype == wp.uint32:
-                    tiled_depth_data = wp.from_torch(
-                        wp.to_torch(tiled_depth_data).view(torch.float32), dtype=wp.float32
+                    # Reinterpret in place as float32 (same bit width). Pure warp.
+                    tiled_depth_data = wp.array(
+                        ptr=tiled_depth_data.ptr,
+                        dtype=wp.float32,
+                        shape=tiled_depth_data.shape,
+                        strides=tiled_depth_data.strides,
+                        device=tiled_depth_data.device,
+                        copy=False,
                     )
                 self._extract_depth_tiles(render_data, tiled_depth_data, output_buffers)
             break
@@ -590,15 +586,17 @@ def _process_render_frame(self, render_data: OVRTXRenderData, frame, output_buff
 
                 if tiled_semantic_data.dtype == wp.uint32:
                     semantic_colors = self._generate_random_colors_from_ids(tiled_semantic_data)
-
-                    semantic_torch = wp.to_torch(semantic_colors)
-                    semantic_uint8 = semantic_torch.view(torch.uint8)
-
-                    if semantic_torch.dim() == 2:
-                        h, w = semantic_torch.shape
-                        semantic_uint8 = semantic_uint8.reshape(h, w, 4)
-
-                    tiled_semantic_data = wp.from_torch(semantic_uint8, dtype=wp.uint8)
+                    # Reinterpret each uint32 as 4 uint8 bytes by adding a trailing
+                    # channel axis. Pure warp; no torch involvement.
+                    base_shape = tuple(semantic_colors.shape)
+                    new_shape = base_shape + (4,) if len(base_shape) == 2 else base_shape
+                    tiled_semantic_data = wp.array(
+                        ptr=semantic_colors.ptr,
+                        dtype=wp.uint8,
+                        shape=new_shape,
+                        device=semantic_colors.device,
+                        copy=False,
+                    )
 
                 self._extract_rgba_tiles(
                     render_data,
diff --git a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py
index 10f90e53c958..d8ca957ac72c 100644
--- a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py
+++ b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py
@@ -14,7 +14,6 @@
 from typing import TYPE_CHECKING, Any
 
 import numpy as np
-import torch
 import warp as wp
 from packaging import version
 
@@ -24,7 +23,7 @@
 from isaaclab.renderers import BaseRenderer, RenderBufferKind, RenderBufferSpec
 from isaaclab.renderers.camera_render_spec import CameraRenderSpec
 from isaaclab.utils.version import get_isaac_sim_version
-from isaaclab.utils.warp.kernels import reshape_tiled_image
+from isaaclab.utils.warp.kernels import clamp_above_to_inf_kernel, replace_inf_kernel, reshape_tiled_image
 
 from .isaac_rtx_renderer_utils import ensure_isaac_rtx_render_update, ensure_rtx_hydra_engine_attached
 
@@ -107,19 +106,19 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]:
         specs: dict[RenderBufferKind, RenderBufferSpec] = {
             # Replicator's native layout for color output is rgba/uint8;
             # ``Camera`` aliases ``rgb`` as a view into ``rgba`` storage.
-            RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8),
-            RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8),
-            RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, torch.float32),
-            RenderBufferKind.NORMALS: RenderBufferSpec(3, torch.float32),
-            RenderBufferKind.MOTION_VECTORS: RenderBufferSpec(2, torch.float32),
+            RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8),
+            RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8),
+            RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, wp.float32),
+            RenderBufferKind.NORMALS: RenderBufferSpec(3, wp.float32),
+            RenderBufferKind.MOTION_VECTORS: RenderBufferSpec(2, wp.float32),
         }
 
         if sim_major >= 6:
-            specs[RenderBufferKind.ALBEDO] = RenderBufferSpec(4, torch.uint8)
+            specs[RenderBufferKind.ALBEDO] = RenderBufferSpec(4, wp.uint8)
             for shading_type in SIMPLE_SHADING_MODES:
-                specs[RenderBufferKind(shading_type)] = RenderBufferSpec(3, torch.uint8)
+                specs[RenderBufferKind(shading_type)] = RenderBufferSpec(3, wp.uint8)
 
         seg_specs = (
             (RenderBufferKind.SEMANTIC_SEGMENTATION, self.cfg.colorize_semantic_segmentation),
@@ -127,7 +126,7 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]:
             (RenderBufferKind.INSTANCE_ID_SEGMENTATION_FAST, self.cfg.colorize_instance_id_segmentation),
         )
         for name, colorize in seg_specs:
-            specs[name] = RenderBufferSpec(4, torch.uint8) if colorize else RenderBufferSpec(1, torch.int32)
+            specs[name] = RenderBufferSpec(4, wp.uint8) if colorize else RenderBufferSpec(1, wp.int32)
 
         return specs
 
@@ -332,13 +331,18 @@ def tiling_grid_shape():
             else:
                 tiled_data_buffer = output
 
-            # convert data buffer to warp array
+            # convert data buffer to warp array. Replicator may return either a
+            # numpy array (CPU staging) or a torch tensor (GPU device-resident);
+            # both are wrapped via warp's interop without going through torch ops.
             if isinstance(tiled_data_buffer, np.ndarray):
-                # Let warp infer the dtype from numpy array instead of hardcoding uint8
+                # Let warp infer the dtype from numpy array instead of hardcoding uint8.
                 # Different annotators return different dtypes: RGB(uint8), depth(float32), segmentation(uint32)
                 tiled_data_buffer = wp.array(tiled_data_buffer, device=device)
+            elif isinstance(tiled_data_buffer, wp.array):
+                pass
             else:
-                tiled_data_buffer = tiled_data_buffer.to(device=device)
+                # torch.Tensor path: wrap as a zero-copy wp.array (no torch op).
+                tiled_data_buffer = wp.from_torch(tiled_data_buffer.contiguous())
 
             # process data for different segmentation types
             # Note: Replicator returns raw buffers of dtype uint32 for segmentation types
@@ -385,18 +389,24 @@ def tiling_grid_shape():
             #       in values larger than the clipping range in the output. We apply an additional clipping to
             #       ensure values are within the clipping range for all the annotators.
             if data_type == "distance_to_camera":
-                # Use the zero-copy torch view of the wp.array primary for the in-place clip.
-                view = wp.to_torch(output_wp)
-                view[view > cfg.spawn.clipping_range[1]] = torch.inf
+                wp.launch(
+                    kernel=clamp_above_to_inf_kernel,
+                    dim=output_wp.shape,
+                    inputs=[float(cfg.spawn.clipping_range[1]), output_wp],
+                    device=device,
+                )
 
             # apply defined clipping behavior
             if (
                 data_type in ("distance_to_camera", "distance_to_image_plane", "depth")
                 and self.cfg.depth_clipping_behavior != "none"
             ):
-                view = wp.to_torch(output_wp)
-                view[torch.isinf(view)] = (
-                    0.0 if self.cfg.depth_clipping_behavior == "zero" else cfg.spawn.clipping_range[1]
+                replacement = 0.0 if self.cfg.depth_clipping_behavior == "zero" else cfg.spawn.clipping_range[1]
+                wp.launch(
+                    kernel=replace_inf_kernel,
+                    dim=output_wp.shape,
+                    inputs=[float(replacement), output_wp],
+                    device=device,
                 )
 
     def read_output(self, render_data: IsaacRtxRenderData, camera_data: CameraData) -> None:

From 7d0b4d8977d2c933e7106b821c7394bfd30faa93 Mon Sep 17 00:00:00 2001
From: Horde <horde@nvidia.com>
Date: Fri, 8 May 2026 00:42:56 +0000
Subject: [PATCH 3/3] Updates

---
 .../isaaclab/sensors/camera/camera.py         | 43 ++++++++++++-------
 .../sensors/camera/orientation_conventions.py | 35 +++++++++------
 .../changelog.d/camera-data-proxyarray.rst    |  7 +++
 .../changelog.d/camera-data-proxyarray.rst    | 10 +++++
 4 files changed, 66 insertions(+), 29 deletions(-)
 create mode 100644 source/isaaclab_contrib/changelog.d/camera-data-proxyarray.rst
 create mode 100644 source/isaaclab_tasks/changelog.d/camera-data-proxyarray.rst

diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py
index b63f44a044ce..7a8ecb368810 100644
--- a/source/isaaclab/isaaclab/sensors/camera/camera.py
+++ b/source/isaaclab/isaaclab/sensors/camera/camera.py
@@ -576,11 +576,15 @@ def _create_buffers(self):
         )
         # Camera-frame state (pose / intrinsics) is owned by the camera, not
         # the renderer: populate it on the freshly constructed CameraData.
+        # ``pos_w`` / ``quat_w_world`` are stored as plain float32 arrays with
+        # explicit ``(N, 3)`` / ``(N, 4)`` layout so ``ProxyArray.shape``
+        # matches the documented shape; warp kernels reinterpret them as
+        # ``wp.vec3`` / ``wp.quatf`` views (zero-copy) when needed.
         n = self._view.count
         self._data.intrinsic_matrices = ProxyArray(wp.zeros((n, 3, 3), dtype=wp.float32, device=device_str))
         self._update_intrinsic_matrices(list(range(n)))
-        self._data.pos_w = ProxyArray(wp.zeros(n, dtype=wp.vec3, device=device_str))
-        self._data.quat_w_world = ProxyArray(wp.zeros(n, dtype=wp.quatf, device=device_str))
+        self._data.pos_w = ProxyArray(wp.zeros((n, 3), dtype=wp.float32, device=device_str))
+        self._data.quat_w_world = ProxyArray(wp.zeros((n, 4), dtype=wp.float32, device=device_str))
         self._update_poses_wp(self._ALL_INDICES)
         # Hand the renderer the underlying wp.array buffers; the ProxyArray
         # wrappers in CameraData stay valid since both views share memory.
@@ -639,29 +643,36 @@ def _update_poses_wp(self, env_ids_wp: wp.array):
         # Convert quat_w (opengl) -> world via a warp kernel and scatter into CameraData.
         n_pose = int(env_ids_wp.shape[0])
         quat_w_world_local = convert_quat_array(quat_w_proxy.warp, origin="opengl", target="world")
-        # Build a temporary wp.bool mask covering env_ids_wp so we can scatter through
-        # the existing masked_set kernels.
-        mask_local = wp.zeros(self._view.count, dtype=wp.bool, device=self._device)
-        wp.launch(
-            camera_kernels.indices_to_mask_kernel,
-            dim=n_pose,
-            inputs=[env_ids_wp, mask_local],
-            device=self._device,
+        n_total = self._view.count
+        # Reinterpret the (N, 3) / (N, 4) float32 storage as wp.vec3 / wp.quatf
+        # views so the existing scatter kernels (which take typed inputs) can be
+        # reused without copying.
+        pos_dst = self._data.pos_w.warp
+        quat_dst = self._data.quat_w_world.warp
+        pos_dst_vec3 = wp.array(
+            ptr=pos_dst.ptr,
+            dtype=wp.vec3,
+            shape=(n_total,),
+            device=pos_dst.device,
+            copy=False,
+        )
+        quat_dst_quatf = wp.array(
+            ptr=quat_dst.ptr,
+            dtype=wp.quatf,
+            shape=(n_total,),
+            device=quat_dst.device,
+            copy=False,
         )
-        # Scatter pos and quat into self._data via masked copies.
-        # Note: the view's pos/quat arrays are already the same length as env_ids_wp,
-        # so we copy them into a full-sized scratch first via index, then mask-copy.
-        # For correctness, we instead scatter directly from the (M,) arrays using an indexed kernel.
         wp.launch(
             camera_kernels.scatter_vec3f_kernel,
             dim=n_pose,
-            inputs=[env_ids_wp, pos_w_proxy.warp, self._data.pos_w.warp],
+            inputs=[env_ids_wp, pos_w_proxy.warp, pos_dst_vec3],
             device=self._device,
         )
         wp.launch(
             camera_kernels.scatter_quatf_kernel,
             dim=n_pose,
-            inputs=[env_ids_wp, quat_w_world_local, self._data.quat_w_world.warp],
+            inputs=[env_ids_wp, quat_w_world_local, quat_dst_quatf],
             device=self._device,
         )
         # notify renderer of updated poses (guarded in case called before initialization completes)
diff --git a/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py b/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py
index b9ed24778e98..909495a70c34 100644
--- a/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py
+++ b/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py
@@ -13,6 +13,7 @@
 
 from __future__ import annotations
 
+import functools
 import math
 from typing import Literal
 
@@ -82,17 +83,25 @@ def _matrix_to_quat_xyzw(rotm: np.ndarray) -> tuple[float, float, float, float]:
 # 180 deg rotation around X axis: q = (1, 0, 0, 0) in (x, y, z, w). 180-deg rotations are
 # self-inverse, so the same quaternion serves both directions for ros<->opengl.
 _Q_FLIP_YZ = (1.0, 0.0, 0.0, 0.0)
-# The torch reference (``isaaclab.utils.math.convert_camera_frame_orientation_convention``)
-# post-multiplies by matrix_from_euler([pi/2, -pi/2, 0], "XYZ") for world->opengl and by
-# the *transpose* of the same matrix for opengl->world. As a quaternion, the transpose of
-# a unit rotation matrix is the inverse — i.e. the conjugate of the corresponding quat.
-_Q_WORLD_TO_OPENGL = _matrix_to_quat_xyzw(_euler_xyz_intrinsic_to_matrix(math.pi / 2, -math.pi / 2, 0.0))
-_Q_OPENGL_TO_WORLD = (
-    -_Q_WORLD_TO_OPENGL[0],
-    -_Q_WORLD_TO_OPENGL[1],
-    -_Q_WORLD_TO_OPENGL[2],
-    _Q_WORLD_TO_OPENGL[3],
-)
+
+
+@functools.cache
+def _q_world_to_opengl() -> tuple[float, float, float, float]:
+    """The torch reference (``isaaclab.utils.math.convert_camera_frame_orientation_convention``)
+    post-multiplies by ``matrix_from_euler([pi/2, -pi/2, 0], "XYZ")`` for world->opengl.
+
+    Computed lazily so importing this module under tools that mock ``numpy``
+    (e.g. Sphinx ``autodoc_mock_imports``) does not fail at module load.
+    """
+    return _matrix_to_quat_xyzw(_euler_xyz_intrinsic_to_matrix(math.pi / 2, -math.pi / 2, 0.0))
+
+
+@functools.cache
+def _q_opengl_to_world() -> tuple[float, float, float, float]:
+    """Inverse of :func:`_q_world_to_opengl` — i.e. the conjugate, since the
+    underlying rotation is unit-norm. Lazy for the same reason."""
+    qx, qy, qz, qw = _q_world_to_opengl()
+    return -qx, -qy, -qz, qw
 
 
 def _identity() -> tuple[float, float, float, float]:
@@ -125,7 +134,7 @@ def _resolve_conversion_quat(
     if origin == "ros":
         to_opengl = _Q_FLIP_YZ
     elif origin == "world":
-        to_opengl = _Q_WORLD_TO_OPENGL
+        to_opengl = _q_world_to_opengl()
     else:  # origin == "opengl"
         to_opengl = _identity()
 
@@ -133,7 +142,7 @@ def _resolve_conversion_quat(
     if target == "ros":
         from_opengl = _Q_FLIP_YZ
     elif target == "world":
-        from_opengl = _Q_OPENGL_TO_WORLD
+        from_opengl = _q_opengl_to_world()
     else:  # target == "opengl"
         from_opengl = _identity()
 
diff --git a/source/isaaclab_contrib/changelog.d/camera-data-proxyarray.rst b/source/isaaclab_contrib/changelog.d/camera-data-proxyarray.rst
new file mode 100644
index 000000000000..8c30136624ef
--- /dev/null
+++ b/source/isaaclab_contrib/changelog.d/camera-data-proxyarray.rst
@@ -0,0 +1,7 @@
+Changed
+^^^^^^^
+
+* :class:`~isaaclab_contrib.sensors.tacsl_sensor.VisuotactileSensor` now accesses
+  :attr:`~isaaclab.sensors.camera.CameraData.output` entries through their
+  :attr:`~isaaclab.utils.warp.proxy_array.ProxyArray.torch` view to match the
+  Warp-first :class:`CameraData` storage.
diff --git a/source/isaaclab_tasks/changelog.d/camera-data-proxyarray.rst b/source/isaaclab_tasks/changelog.d/camera-data-proxyarray.rst
new file mode 100644
index 000000000000..74b5238d20ae
--- /dev/null
+++ b/source/isaaclab_tasks/changelog.d/camera-data-proxyarray.rst
@@ -0,0 +1,10 @@
+Changed
+^^^^^^^
+
+* Updated the cartpole / shadow-hand vision RL envs and the dexsuite
+  ``vision_camera`` observation term to read camera outputs through
+  :attr:`~isaaclab.utils.warp.proxy_array.ProxyArray.torch`, matching the
+  new Warp-first :class:`~isaaclab.sensors.camera.CameraData` storage.
+  Files touched: ``direct/cartpole/cartpole_camera_env.py``,
+  ``direct/shadow_hand/shadow_hand_vision_env.py``,
+  ``manager_based/manipulation/dexsuite/mdp/observations.py``.