isaac-sim · daniela-hase · May 7, 2026 · May 7, 2026 · May 8, 2026
@@ -239,8 +239,11 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
         # save every 10th image (for visualization purposes only)
         # note: saving images will slow down the simulation
         if count % 10 == 0:
-            # compare generated RGB images across different cameras
-            rgb_images = [scene["camera"].data.output["rgb"][0, ..., :3], scene["tiled_camera"].data.output["rgb"][0]]
+            # compare generated RGB images across different cameras (use ProxyArray.torch)
+            rgb_images = [
+                scene["camera"].data.output["rgb"].torch[0, ..., :3],
+                scene["tiled_camera"].data.output["rgb"].torch[0],
+            ]
             save_images_grid(
                 rgb_images,
                 subtitles=["Camera"],
@@ -250,9 +253,9 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
 
             # compare generated Depth images across different cameras
             depth_images = [
-                scene["camera"].data.output["distance_to_image_plane"][0],
-                scene["tiled_camera"].data.output["distance_to_image_plane"][0, ..., 0],
-                scene["raycast_camera"].data.output["distance_to_image_plane"][0],
+                scene["camera"].data.output["distance_to_image_plane"].torch[0],
+                scene["tiled_camera"].data.output["distance_to_image_plane"].torch[0, ..., 0],
+                scene["raycast_camera"].data.output["distance_to_image_plane"].torch[0],
             ]
             save_images_grid(
                 depth_images,
@@ -263,7 +266,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
             )
 
             # save all tiled RGB images
-            tiled_images = scene["tiled_camera"].data.output["rgb"]
+            tiled_images = scene["tiled_camera"].data.output["rgb"].torch
             save_images_grid(
                 tiled_images,
                 subtitles=[f"Cam{i}" for i in range(tiled_images.shape[0])],
@@ -272,7 +275,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene):
             )
 
             # save all camera RGB images
-            cam_images = scene["camera"].data.output["rgb"][..., :3]
+            cam_images = scene["camera"].data.output["rgb"].torch[..., :3]
             save_images_grid(
                 cam_images,
                 subtitles=[f"Cam{i}" for i in range(cam_images.shape[0])],

@@ -232,7 +232,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
             # Save images from camera at camera_index
             # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy.
             single_cam_data = convert_dict_to_backend(
-                {k: v[camera_index] for k, v in camera.data.output.items()}, backend="numpy"
+                {k: v.torch[camera_index] for k, v in camera.data.output.items()}, backend="numpy"
             )
 
             # Pack data back into replicator format to save them using its writer
@@ -245,7 +245,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
                     rep_output["annotators"][key] = {"render_product": {"data": data}}
             # Save images
             # Note: We need to provide On-time data for Replicator to save the images.
-            rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]}
+            rep_output["trigger_outputs"] = {"on_time": camera.frame.torch[camera_index]}
             rep_writer.write(rep_output)
 
         # Draw pointcloud if there is a GUI and --draw has been passed
@@ -256,10 +256,10 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict):
         ):
             # Derive pointcloud from camera at camera_index
             pointcloud = create_pointcloud_from_depth(
-                intrinsic_matrix=camera.data.intrinsic_matrices[camera_index],
-                depth=camera.data.output["distance_to_image_plane"][camera_index],
-                position=camera.data.pos_w[camera_index],
-                orientation=camera.data.quat_w_ros[camera_index],
+                intrinsic_matrix=camera.data.intrinsic_matrices.torch[camera_index],
+                depth=camera.data.output["distance_to_image_plane"].torch[camera_index],
+                position=camera.data.pos_w.torch[camera_index],
+                orientation=camera.data.quat_w_ros.torch[camera_index],
                 device=sim.device,
             )
 

diff --git a/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst b/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst
@@ -0,0 +1,51 @@
+Changed
+^^^^^^^
+
+* **Breaking:** :class:`~isaaclab.sensors.camera.CameraData` now stores its array fields
+  as :class:`~isaaclab.utils.warp.proxy_array.ProxyArray` wrappers around
+  :class:`warp.array` primary buffers. The change covers
+  :attr:`~isaaclab.sensors.camera.CameraData.pos_w`,
+  :attr:`~isaaclab.sensors.camera.CameraData.quat_w_world`,
+  :attr:`~isaaclab.sensors.camera.CameraData.intrinsic_matrices`,
+  every entry in :attr:`~isaaclab.sensors.camera.CameraData.output`, and the
+  derived :attr:`~isaaclab.sensors.camera.CameraData.quat_w_ros` /
+  :attr:`~isaaclab.sensors.camera.CameraData.quat_w_opengl` properties.
+  Migration: read torch tensors via the ``.torch`` accessor (zero-copy) and pass
+  ``.warp`` to warp kernels (e.g. ``camera.data.output["rgb"].torch``,
+  ``camera.data.pos_w.warp``). Existing code that uses the field directly as a
+  tensor still works through :class:`ProxyArray`'s deprecation bridge but emits
+  a one-time :class:`DeprecationWarning`.
+* **Breaking:** :class:`~isaaclab.sensors.ray_caster.MultiMeshRayCasterCameraData`
+  ``image_mesh_ids`` is likewise a :class:`ProxyArray` over a ``wp.int16`` warp
+  buffer; access via ``.torch`` for tensor reads.
+* **Breaking:** :attr:`~isaaclab.sensors.camera.Camera.frame` and
+  :attr:`~isaaclab.sensors.ray_caster.RayCasterCamera.frame` now return a
+  :class:`ProxyArray` over a ``wp.int64`` array instead of a ``torch.Tensor``.
+  Migration: use ``camera.frame.torch`` for the existing tensor view.
+* **Breaking:**
+  :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs` and
+  :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_camera` now type
+  their array parameters as :class:`warp.array` rather than
+  :class:`torch.Tensor`. Renderer subclasses receive the underlying
+  :attr:`ProxyArray.warp` buffers directly from the camera; consumers that
+  implemented :class:`BaseRenderer` must update their signatures.
+* :meth:`~isaaclab.sensors.camera.Camera.set_intrinsic_matrices`,
+  :meth:`~isaaclab.sensors.camera.Camera.set_world_poses`, and
+  :meth:`~isaaclab.sensors.camera.Camera.set_world_poses_from_view` (plus their
+  :class:`~isaaclab.sensors.ray_caster.RayCasterCamera` analogues) now accept
+  ``wp.array | ProxyArray | torch.Tensor | numpy.ndarray`` inputs, with
+  :class:`warp.array` documented as the canonical type. Existing
+  ``torch.Tensor`` callers continue to work without changes.
+* **Breaking:** :attr:`~isaaclab.renderers.RenderBufferSpec.dtype` now stores a
+  Warp scalar dtype (e.g. ``warp.uint8``) rather than a ``torch.dtype``.
+  Renderer authors that build :class:`RenderBufferSpec` instances must update
+  ``RenderBufferSpec(channels, torch.uint8)`` to ``RenderBufferSpec(channels,
+  wp.uint8)`` (and similarly for ``torch.float32``, ``torch.int32``).
+* :class:`~isaaclab.sensors.camera.Camera`,
+  :class:`~isaaclab.sensors.camera.CameraData`, and the three first-party
+  renderers (:class:`IsaacRtxRenderer`, :class:`NewtonWarpRenderer`,
+  :class:`OVRTXRenderer`) no longer import or use ``torch`` internally: every
+  storage allocation goes through :func:`warp.zeros`, scatter / mask / clipping
+  operations are warp kernels (see
+  :mod:`~isaaclab.sensors.camera.kernels`), and quaternion convention
+  conversions are implemented by :func:`isaaclab.sensors.camera.orientation_conventions.convert_quat_array`.
@@ -407,12 +407,12 @@ def image(
     # extract the used quantities (to enable type-hinting)
     sensor: Camera | RayCasterCamera = env.scene.sensors[sensor_cfg.name]
 
-    # obtain the input image
-    images = sensor.data.output[data_type]
+    # obtain the input image (CameraData fields are ProxyArray; .torch is a zero-copy view)
+    images = sensor.data.output[data_type].torch
 
     # depth image conversion
     if (data_type == "distance_to_camera") and convert_perspective_to_orthogonal:
-        images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices)
+        images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices.torch)
 
     # rgb/depth/normals image normalization
     if normalize:

diff --git a/source/isaaclab/isaaclab/renderers/base_renderer.py b/source/isaaclab/isaaclab/renderers/base_renderer.py
@@ -10,12 +10,12 @@
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any
 
+import warp as wp
+
 from .camera_render_spec import CameraRenderSpec
 from .output_contract import RenderBufferKind, RenderBufferSpec
 
 if TYPE_CHECKING:
-    import torch
-
     from isaaclab.sensors.camera.camera_data import CameraData
 
 
@@ -60,13 +60,16 @@ def create_render_data(self, spec: CameraRenderSpec) -> Any:
         pass
 
     @abstractmethod
-    def set_outputs(self, render_data: Any, output_data: dict[str, torch.Tensor]) -> None:
+    def set_outputs(self, render_data: Any, output_data: dict[str, wp.array]) -> None:
         """Store reference to output buffers for writing during render.
 
         Args:
             render_data: The render data object from :meth:`create_render_data`.
             output_data: Dictionary mapping output names (e.g. ``"rgb"``, ``"depth"``)
-                to pre-allocated tensors where rendered data will be written.
+                to pre-allocated :class:`warp.array` buffers where rendered data
+                will be written. Each buffer has shape ``(num_views, height, width,
+                channels)`` and the per-kind dtype declared by
+                :meth:`supported_output_types`.
         """
         pass
 
@@ -80,14 +83,14 @@ def update_transforms(self) -> None:
 
     @abstractmethod
     def update_camera(
-        self, render_data: Any, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor
+        self, render_data: Any, positions: wp.array, orientations: wp.array, intrinsics: wp.array
     ) -> None:
         """Update camera poses and intrinsics for the next render.
 
         Args:
             render_data: The render data object from :meth:`create_render_data`.
-            positions: Camera positions in world frame, shape ``(N, 3)``.
-            orientations: Camera orientations as quaternions (x, y, z, w), shape ``(N, 4)``.
+            positions: Camera positions [m] in world frame, shape ``(N, 3)``.
+            orientations: Camera orientations as quaternions ``(x, y, z, w)``, shape ``(N, 4)``.
             intrinsics: Camera intrinsic matrices, shape ``(N, 3, 3)``.
         """
         pass

diff --git a/source/isaaclab/isaaclab/renderers/output_contract.py b/source/isaaclab/isaaclab/renderers/output_contract.py
@@ -14,10 +14,7 @@
 
 from dataclasses import dataclass
 from enum import StrEnum
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    import torch
+from typing import Any
 
 
 class RenderBufferKind(StrEnum):
@@ -48,7 +45,8 @@ class RenderBufferSpec:
     """Per-pixel layout (channels + dtype) for one render buffer kind."""
 
     channels: int
-    """Number of per-pixel channels (last dimension of the allocated tensor)."""
+    """Number of per-pixel channels (last dimension of the allocated buffer)."""
 
-    dtype: torch.dtype
-    """Torch dtype the renderer writes for this render buffer kind."""
+    dtype: Any
+    """Warp scalar dtype (e.g. ``warp.uint8``, ``warp.float32``, ``warp.int32``)
+    the renderer writes for this render buffer kind."""