From 067a955c1a1cdec243f7e4667ad591712a82936f Mon Sep 17 00:00:00 2001 From: Horde Date: Thu, 7 May 2026 20:24:34 +0000 Subject: [PATCH 1/3] Updated Camera, CameraData and Renderer to use Warp instead of Torch --- scripts/demos/sensors/cameras.py | 17 +- .../tutorials/04_sensors/run_usd_camera.py | 12 +- .../camera-data-proxyarray.minor.rst | 38 +++++ .../isaaclab/envs/mdp/observations.py | 6 +- .../isaaclab/renderers/base_renderer.py | 17 +- .../isaaclab/sensors/camera/camera.py | 139 +++++++++++----- .../isaaclab/sensors/camera/camera_data.py | 57 +++++-- .../multi_mesh_ray_caster_camera.py | 41 +++-- .../multi_mesh_ray_caster_camera_data.py | 9 +- .../sensors/ray_caster/ray_caster_camera.py | 151 ++++++++++++------ .../renderers/test_camera_output_contract.py | 41 +++-- .../tacsl_sensor/visuotactile_sensor.py | 8 +- .../changelog.d/camera-data-proxyarray.skip | 2 + .../renderers/newton_warp_renderer.py | 58 ++++--- .../changelog.d/camera-data-proxyarray.skip | 2 + .../isaaclab_ov/renderers/ovrtx_renderer.py | 42 ++--- .../camera-data-proxyarray.minor.rst | 13 ++ .../renderers/isaac_rtx_renderer.py | 27 ++-- .../direct/cartpole/cartpole_camera_env.py | 14 +- .../shadow_hand/shadow_hand_vision_env.py | 5 +- .../manipulation/dexsuite/mdp/observations.py | 3 +- 21 files changed, 475 insertions(+), 227 deletions(-) create mode 100644 source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst create mode 100644 source/isaaclab_newton/changelog.d/camera-data-proxyarray.skip create mode 100644 source/isaaclab_ov/changelog.d/camera-data-proxyarray.skip create mode 100644 source/isaaclab_physx/changelog.d/camera-data-proxyarray.minor.rst diff --git a/scripts/demos/sensors/cameras.py b/scripts/demos/sensors/cameras.py index 400881ac5d62..970bd7841364 100644 --- a/scripts/demos/sensors/cameras.py +++ b/scripts/demos/sensors/cameras.py @@ -239,8 +239,11 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene): # save every 10th image (for visualization purposes only) # note: saving images will slow down the simulation if count % 10 == 0: - # compare generated RGB images across different cameras - rgb_images = [scene["camera"].data.output["rgb"][0, ..., :3], scene["tiled_camera"].data.output["rgb"][0]] + # compare generated RGB images across different cameras (use ProxyArray.torch) + rgb_images = [ + scene["camera"].data.output["rgb"].torch[0, ..., :3], + scene["tiled_camera"].data.output["rgb"].torch[0], + ] save_images_grid( rgb_images, subtitles=["Camera"], @@ -250,9 +253,9 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene): # compare generated Depth images across different cameras depth_images = [ - scene["camera"].data.output["distance_to_image_plane"][0], - scene["tiled_camera"].data.output["distance_to_image_plane"][0, ..., 0], - scene["raycast_camera"].data.output["distance_to_image_plane"][0], + scene["camera"].data.output["distance_to_image_plane"].torch[0], + scene["tiled_camera"].data.output["distance_to_image_plane"].torch[0, ..., 0], + scene["raycast_camera"].data.output["distance_to_image_plane"].torch[0], ] save_images_grid( depth_images, @@ -263,7 +266,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene): ) # save all tiled RGB images - tiled_images = scene["tiled_camera"].data.output["rgb"] + tiled_images = scene["tiled_camera"].data.output["rgb"].torch save_images_grid( tiled_images, subtitles=[f"Cam{i}" for i in range(tiled_images.shape[0])], @@ -272,7 +275,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene: InteractiveScene): ) # save all camera RGB images - cam_images = scene["camera"].data.output["rgb"][..., :3] + cam_images = scene["camera"].data.output["rgb"].torch[..., :3] save_images_grid( cam_images, subtitles=[f"Cam{i}" for i in range(cam_images.shape[0])], diff --git a/scripts/tutorials/04_sensors/run_usd_camera.py b/scripts/tutorials/04_sensors/run_usd_camera.py index 5c041d737c5d..2a9626810238 100644 --- a/scripts/tutorials/04_sensors/run_usd_camera.py +++ b/scripts/tutorials/04_sensors/run_usd_camera.py @@ -232,7 +232,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict): # Save images from camera at camera_index # note: BasicWriter only supports saving data in numpy format, so we need to convert the data to numpy. single_cam_data = convert_dict_to_backend( - {k: v[camera_index] for k, v in camera.data.output.items()}, backend="numpy" + {k: v.torch[camera_index] for k, v in camera.data.output.items()}, backend="numpy" ) # Pack data back into replicator format to save them using its writer @@ -245,7 +245,7 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict): rep_output["annotators"][key] = {"render_product": {"data": data}} # Save images # Note: We need to provide On-time data for Replicator to save the images. - rep_output["trigger_outputs"] = {"on_time": camera.frame[camera_index]} + rep_output["trigger_outputs"] = {"on_time": camera.frame.torch[camera_index]} rep_writer.write(rep_output) # Draw pointcloud if there is a GUI and --draw has been passed @@ -256,10 +256,10 @@ def run_simulator(sim: sim_utils.SimulationContext, scene_entities: dict): ): # Derive pointcloud from camera at camera_index pointcloud = create_pointcloud_from_depth( - intrinsic_matrix=camera.data.intrinsic_matrices[camera_index], - depth=camera.data.output["distance_to_image_plane"][camera_index], - position=camera.data.pos_w[camera_index], - orientation=camera.data.quat_w_ros[camera_index], + intrinsic_matrix=camera.data.intrinsic_matrices.torch[camera_index], + depth=camera.data.output["distance_to_image_plane"].torch[camera_index], + position=camera.data.pos_w.torch[camera_index], + orientation=camera.data.quat_w_ros.torch[camera_index], device=sim.device, ) diff --git a/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst b/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst new file mode 100644 index 000000000000..080dff3cf95a --- /dev/null +++ b/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst @@ -0,0 +1,38 @@ +Changed +^^^^^^^ + +* **Breaking:** :class:`~isaaclab.sensors.camera.CameraData` now stores its array fields + as :class:`~isaaclab.utils.warp.proxy_array.ProxyArray` wrappers around + :class:`warp.array` primary buffers. The change covers + :attr:`~isaaclab.sensors.camera.CameraData.pos_w`, + :attr:`~isaaclab.sensors.camera.CameraData.quat_w_world`, + :attr:`~isaaclab.sensors.camera.CameraData.intrinsic_matrices`, + every entry in :attr:`~isaaclab.sensors.camera.CameraData.output`, and the + derived :attr:`~isaaclab.sensors.camera.CameraData.quat_w_ros` / + :attr:`~isaaclab.sensors.camera.CameraData.quat_w_opengl` properties. + Migration: read torch tensors via the ``.torch`` accessor (zero-copy) and pass + ``.warp`` to warp kernels (e.g. ``camera.data.output["rgb"].torch``, + ``camera.data.pos_w.warp``). Existing code that uses the field directly as a + tensor still works through :class:`ProxyArray`'s deprecation bridge but emits + a one-time :class:`DeprecationWarning`. +* **Breaking:** :class:`~isaaclab.sensors.ray_caster.MultiMeshRayCasterCameraData` + ``image_mesh_ids`` is likewise a :class:`ProxyArray` over a ``wp.int16`` warp + buffer; access via ``.torch`` for tensor reads. +* **Breaking:** :attr:`~isaaclab.sensors.camera.Camera.frame` and + :attr:`~isaaclab.sensors.ray_caster.RayCasterCamera.frame` now return a + :class:`ProxyArray` over a ``wp.int64`` array instead of a ``torch.Tensor``. + Migration: use ``camera.frame.torch`` for the existing tensor view. +* **Breaking:** + :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs` and + :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_camera` now type + their array parameters as :class:`warp.array` rather than + :class:`torch.Tensor`. Renderer subclasses receive the underlying + :attr:`ProxyArray.warp` buffers directly from the camera; consumers that + implemented :class:`BaseRenderer` must update their signatures. +* :meth:`~isaaclab.sensors.camera.Camera.set_intrinsic_matrices`, + :meth:`~isaaclab.sensors.camera.Camera.set_world_poses`, and + :meth:`~isaaclab.sensors.camera.Camera.set_world_poses_from_view` (plus their + :class:`~isaaclab.sensors.ray_caster.RayCasterCamera` analogues) now accept + ``wp.array | ProxyArray | torch.Tensor | numpy.ndarray`` inputs, with + :class:`warp.array` documented as the canonical type. Existing + ``torch.Tensor`` callers continue to work without changes. diff --git a/source/isaaclab/isaaclab/envs/mdp/observations.py b/source/isaaclab/isaaclab/envs/mdp/observations.py index 8f97da3595dd..adf09116cde2 100644 --- a/source/isaaclab/isaaclab/envs/mdp/observations.py +++ b/source/isaaclab/isaaclab/envs/mdp/observations.py @@ -407,12 +407,12 @@ def image( # extract the used quantities (to enable type-hinting) sensor: Camera | RayCasterCamera = env.scene.sensors[sensor_cfg.name] - # obtain the input image - images = sensor.data.output[data_type] + # obtain the input image (CameraData fields are ProxyArray; .torch is a zero-copy view) + images = sensor.data.output[data_type].torch # depth image conversion if (data_type == "distance_to_camera") and convert_perspective_to_orthogonal: - images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices) + images = math_utils.orthogonalize_perspective_depth(images, sensor.data.intrinsic_matrices.torch) # rgb/depth/normals image normalization if normalize: diff --git a/source/isaaclab/isaaclab/renderers/base_renderer.py b/source/isaaclab/isaaclab/renderers/base_renderer.py index 2fc498eae8e3..604dc6ea8794 100644 --- a/source/isaaclab/isaaclab/renderers/base_renderer.py +++ b/source/isaaclab/isaaclab/renderers/base_renderer.py @@ -10,12 +10,12 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any +import warp as wp + from .camera_render_spec import CameraRenderSpec from .output_contract import RenderBufferKind, RenderBufferSpec if TYPE_CHECKING: - import torch - from isaaclab.sensors.camera.camera_data import CameraData @@ -60,13 +60,16 @@ def create_render_data(self, spec: CameraRenderSpec) -> Any: pass @abstractmethod - def set_outputs(self, render_data: Any, output_data: dict[str, torch.Tensor]) -> None: + def set_outputs(self, render_data: Any, output_data: dict[str, wp.array]) -> None: """Store reference to output buffers for writing during render. Args: render_data: The render data object from :meth:`create_render_data`. output_data: Dictionary mapping output names (e.g. ``"rgb"``, ``"depth"``) - to pre-allocated tensors where rendered data will be written. + to pre-allocated :class:`warp.array` buffers where rendered data + will be written. Each buffer has shape ``(num_views, height, width, + channels)`` and the per-kind dtype declared by + :meth:`supported_output_types`. """ pass @@ -80,14 +83,14 @@ def update_transforms(self) -> None: @abstractmethod def update_camera( - self, render_data: Any, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor + self, render_data: Any, positions: wp.array, orientations: wp.array, intrinsics: wp.array ) -> None: """Update camera poses and intrinsics for the next render. Args: render_data: The render data object from :meth:`create_render_data`. - positions: Camera positions in world frame, shape ``(N, 3)``. - orientations: Camera orientations as quaternions (x, y, z, w), shape ``(N, 4)``. + positions: Camera positions [m] in world frame, shape ``(N, 3)``. + orientations: Camera orientations as quaternions ``(x, y, z, w)``, shape ``(N, 4)``. intrinsics: Camera intrinsic matrices, shape ``(N, 3, 3)``. """ pass diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py index c481002e524e..2ba26c672c73 100644 --- a/source/isaaclab/isaaclab/sensors/camera/camera.py +++ b/source/isaaclab/isaaclab/sensors/camera/camera.py @@ -26,6 +26,7 @@ create_rotation_matrix_from_view, quat_from_matrix, ) +from isaaclab.utils.warp.proxy_array import ProxyArray from ..sensor_base import SensorBase from .camera_data import CameraData, RenderBufferKind @@ -170,8 +171,13 @@ def data(self) -> CameraData: return self._data @property - def frame(self) -> torch.tensor: - """Frame number when the measurement took place.""" + def frame(self) -> ProxyArray: + """Frame number when the measurement took place. + + Shape is (N,), dtype ``wp.int64``. Use ``.torch`` for a zero-copy + :class:`torch.Tensor` view or ``.warp`` for the underlying + :class:`warp.array`. + """ return self._frame @property @@ -184,7 +190,10 @@ def image_shape(self) -> tuple[int, int]: """ def set_intrinsic_matrices( - self, matrices: torch.Tensor, focal_length: float | None = None, env_ids: Sequence[int] | None = None + self, + matrices: wp.array | ProxyArray | torch.Tensor | np.ndarray, + focal_length: float | None = None, + env_ids: Sequence[int] | None = None, ): """Set parameters of the USD camera from its intrinsic matrix. @@ -203,7 +212,10 @@ def set_intrinsic_matrices( is not true in the input intrinsic matrix, then the camera will not set up correctly. Args: - matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). + matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). A + :class:`warp.array` is the canonical input type; + :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`, + :class:`torch.Tensor`, and :class:`numpy.ndarray` are also accepted. focal_length: Perspective focal length (in cm) used to calculate pixel size. Defaults to None. If None, focal_length will be calculated 1 / width. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. @@ -211,7 +223,11 @@ def set_intrinsic_matrices( # resolve env_ids if env_ids is None: env_ids = self._ALL_INDICES - # convert matrices to numpy tensors + # normalize input: ProxyArray → wp.array → torch.Tensor → numpy + if isinstance(matrices, ProxyArray): + matrices = matrices.torch + elif isinstance(matrices, wp.array): + matrices = wp.to_torch(matrices) if isinstance(matrices, torch.Tensor): matrices = matrices.cpu().numpy() else: @@ -246,8 +262,8 @@ def set_intrinsic_matrices( def set_world_poses( self, - positions: torch.Tensor | None = None, - orientations: torch.Tensor | None = None, + positions: wp.array | ProxyArray | torch.Tensor | np.ndarray | None = None, + orientations: wp.array | ProxyArray | torch.Tensor | np.ndarray | None = None, env_ids: Sequence[int] | None = None, convention: Literal["opengl", "ros", "world"] = "ros", ): @@ -264,10 +280,13 @@ def set_world_poses( on the conventions. Args: - positions: The cartesian coordinates (in meters). Shape is (N, 3). - Defaults to None, in which case the camera position in not changed. + positions: The cartesian coordinates [m]. Shape is (N, 3). A :class:`warp.array` + is the canonical input type; :class:`ProxyArray`, :class:`torch.Tensor`, + and :class:`numpy.ndarray` are also accepted. Defaults to None, in which + case the camera position is not changed. orientations: The quaternion orientation in (x, y, z, w). Shape is (N, 4). - Defaults to None, in which case the camera orientation in not changed. + Same accepted types as ``positions``. Defaults to None, in which case + the camera orientation is not changed. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. convention: The convention in which the poses are fed. Defaults to "ros". @@ -277,18 +296,12 @@ def set_world_poses( # resolve env_ids if env_ids is None: env_ids = self._ALL_INDICES - # convert to backend tensor + # normalize positions to a torch tensor on the sensor device if positions is not None: - if isinstance(positions, np.ndarray): - positions = torch.from_numpy(positions).to(device=self._device) - elif not isinstance(positions, torch.Tensor): - positions = torch.tensor(positions, device=self._device) + positions = self._to_device_tensor(positions) # convert rotation matrix from input convention to OpenGL if orientations is not None: - if isinstance(orientations, np.ndarray): - orientations = torch.from_numpy(orientations).to(device=self._device) - elif not isinstance(orientations, torch.Tensor): - orientations = torch.tensor(orientations, device=self._device) + orientations = self._to_device_tensor(orientations) orientations = convert_camera_frame_orientation_convention(orientations, origin=convention, target="opengl") # convert torch tensors to warp arrays for the view pos_wp = wp.from_torch(positions.contiguous()) if positions is not None else None @@ -302,13 +315,19 @@ def set_world_poses( self._view.set_world_poses(pos_wp, ori_wp, idx_wp) def set_world_poses_from_view( - self, eyes: torch.Tensor, targets: torch.Tensor, env_ids: Sequence[int] | None = None + self, + eyes: wp.array | ProxyArray | torch.Tensor | np.ndarray, + targets: wp.array | ProxyArray | torch.Tensor | np.ndarray, + env_ids: Sequence[int] | None = None, ): """Set the poses of the camera from the eye position and look-at target position. Args: - eyes: The positions of the camera's eye. Shape is (N, 3). - targets: The target locations to look at. Shape is (N, 3). + eyes: The positions of the camera's eye. Shape is (N, 3). A :class:`warp.array` + is the canonical input type; :class:`ProxyArray`, :class:`torch.Tensor`, + and :class:`numpy.ndarray` are also accepted. + targets: The target locations to look at. Shape is (N, 3). Same accepted types + as ``eyes``. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. Raises: @@ -318,6 +337,9 @@ def set_world_poses_from_view( # resolve env_ids if env_ids is None: env_ids = self._ALL_INDICES + # normalize inputs to torch tensors on the sensor device + eyes = self._to_device_tensor(eyes) + targets = self._to_device_tensor(targets) # get up axis of current stage up_axis = UsdGeom.GetStageUpAxis(self.stage) # set camera poses using the view @@ -346,8 +368,8 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None # reset the data # note: this recomputation is useful if one performs events such as randomizations on the camera poses. self._update_poses(env_ids) - # Reset the frame count - self._frame[env_ids] = 0 + # Reset the frame count (ProxyArray; mutate via the zero-copy torch view) + self._frame.torch[env_ids] = 0 """ Implementation. @@ -390,8 +412,10 @@ def _initialize_impl(self): # Create all env_ids buffer self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long) - # Create frame count buffer - self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long) + # Create frame count buffer (wp.array primary, ProxyArray-published). + self._frame = ProxyArray( + wp.from_torch(torch.zeros(self._view.count, device=self._device, dtype=torch.long).contiguous()) + ) # Convert all encapsulated prims to Camera for cam_prim in self._view.prims: @@ -427,8 +451,8 @@ def _update_buffers_impl(self, env_mask: wp.array): env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1) if len(env_ids) == 0: return - # Increment frame count - self._frame[env_ids] += 1 + # Increment frame count (ProxyArray; mutate via the zero-copy torch view) + self._frame.torch[env_ids] += 1 # update latest camera pose if requested if self.cfg.update_latest_camera_pose: self._update_poses(env_ids) @@ -451,6 +475,23 @@ def _update_buffers_impl(self, env_mask: wp.array): Private Helpers """ + def _to_device_tensor(self, value: wp.array | ProxyArray | torch.Tensor | np.ndarray) -> torch.Tensor: + """Normalize an array-like input to a ``torch.Tensor`` on this sensor's device. + + Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`, + :class:`torch.Tensor`, and :class:`numpy.ndarray`. Warp inputs are unwrapped via + ``wp.to_torch`` (zero-copy). + """ + if isinstance(value, ProxyArray): + value = value.torch + elif isinstance(value, wp.array): + value = wp.to_torch(value) + if isinstance(value, np.ndarray): + return torch.from_numpy(value).to(device=self._device) + if not isinstance(value, torch.Tensor): + return torch.tensor(value, device=self._device) + return value.to(device=self._device) + def _check_supported_data_types(self, cfg: CameraCfg): """Checks if the data types are supported by the ray-caster camera.""" # check if there is any intersection in unsupported types @@ -501,12 +542,22 @@ def _create_buffers(self): ) # Camera-frame state (pose / intrinsics) is owned by the camera, not # the renderer: populate it on the freshly constructed ``CameraData``. - self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device) + # Each field stores a wp.array primary buffer (allocated via wp.from_torch + # over a torch.zeros allocation) and is published as a ProxyArray. + self._data.intrinsic_matrices = ProxyArray( + wp.from_torch(torch.zeros((self._view.count, 3, 3), device=self._device).contiguous()) + ) self._update_intrinsic_matrices(self._ALL_INDICES) - self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device) - self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device) + self._data.pos_w = ProxyArray( + wp.from_torch(torch.zeros((self._view.count, 3), device=self._device).contiguous()) + ) + self._data.quat_w_world = ProxyArray( + wp.from_torch(torch.zeros((self._view.count, 4), device=self._device).contiguous()) + ) self._update_poses(self._ALL_INDICES) - self._renderer.set_outputs(self._render_data, self._data.output) + # Hand the renderer the underlying wp.array buffers; the ProxyArray + # wrappers in CameraData stay valid since both views share memory. + self._renderer.set_outputs(self._render_data, {name: proxy.warp for name, proxy in self._data.output.items()}) def _update_intrinsic_matrices(self, env_ids: Sequence[int]): """Compute camera's matrix of intrinsic parameters. @@ -517,6 +568,9 @@ def _update_intrinsic_matrices(self, env_ids: Sequence[int]): The calibration matrix projects points in the 3D scene onto an imaginary screen of the camera. The coordinates of points on the image plane are in the homogeneous representation. """ + # zero-copy torch view of the wp.array buffer; in-place mutation is + # visible through both views. + intrinsic_view = self._data.intrinsic_matrices.torch # iterate over all cameras for i in env_ids: # Get corresponding sensor prim @@ -534,11 +588,11 @@ def _update_intrinsic_matrices(self, env_ids: Sequence[int]): c_x = width * 0.5 c_y = height * 0.5 # create intrinsic matrix for depth linear - self._data.intrinsic_matrices[i, 0, 0] = f_x - self._data.intrinsic_matrices[i, 0, 2] = c_x - self._data.intrinsic_matrices[i, 1, 1] = f_y - self._data.intrinsic_matrices[i, 1, 2] = c_y - self._data.intrinsic_matrices[i, 2, 2] = 1 + intrinsic_view[i, 0, 0] = f_x + intrinsic_view[i, 0, 2] = c_x + intrinsic_view[i, 1, 1] = f_y + intrinsic_view[i, 1, 2] = c_y + intrinsic_view[i, 2, 2] = 1 def _update_poses(self, env_ids: Sequence[int]): """Computes the pose of the camera in the world frame with ROS convention. @@ -558,14 +612,19 @@ def _update_poses(self, env_ids: Sequence[int]): env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device) indices = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32) if env_ids is not None else None pos_w, quat_w = self._view.get_world_poses(indices) - self._data.pos_w[env_ids] = pos_w.torch - self._data.quat_w_world[env_ids] = convert_camera_frame_orientation_convention( + # Mutate via the zero-copy torch view to avoid the ProxyArray.__setitem__ + # deprecation warning (the view shares memory with the wp.array primary). + self._data.pos_w.torch[env_ids] = pos_w.torch + self._data.quat_w_world.torch[env_ids] = convert_camera_frame_orientation_convention( quat_w.torch, origin="opengl", target="world" ) # notify renderer of updated poses (guarded in case called before initialization completes) if self._render_data is not None: self._renderer.update_camera( - self._render_data, self._data.pos_w, self._data.quat_w_world, self._data.intrinsic_matrices + self._render_data, + self._data.pos_w.warp, + self._data.quat_w_world.warp, + self._data.intrinsic_matrices.warp, ) """ diff --git a/source/isaaclab/isaaclab/sensors/camera/camera_data.py b/source/isaaclab/isaaclab/sensors/camera/camera_data.py index 6ec5484531b2..40595e40d1c5 100644 --- a/source/isaaclab/isaaclab/sensors/camera/camera_data.py +++ b/source/isaaclab/isaaclab/sensors/camera/camera_data.py @@ -9,29 +9,41 @@ from typing import Any import torch +import warp as wp # Re-exported as part of the public isaaclab.sensors.camera API from isaaclab.renderers.output_contract import RenderBufferKind, RenderBufferSpec from isaaclab.utils.math import convert_camera_frame_orientation_convention +from isaaclab.utils.warp.proxy_array import ProxyArray __all__ = ["CameraData", "RenderBufferKind", "RenderBufferSpec"] +def _wrap(buf: torch.Tensor) -> ProxyArray: + """Wrap a torch buffer as a :class:`ProxyArray` with a zero-copy ``wp.array`` view.""" + return ProxyArray(wp.from_torch(buf)) + + @dataclass class CameraData: - """Data container for the camera sensor.""" + """Data container for the camera sensor. + + All array-valued fields are :class:`~isaaclab.utils.warp.proxy_array.ProxyArray` + wrappers around an underlying :class:`warp.array` (Warp-primary storage with a + cached zero-copy ``torch.Tensor`` view available via ``.torch``). + """ ## # Frame state. ## - pos_w: torch.Tensor = None - """Position of the sensor origin in world frame, following ROS convention. + pos_w: ProxyArray = None + """Position [m] of the sensor origin in world frame, following ROS convention. Shape is (N, 3) where N is the number of sensors. """ - quat_w_world: torch.Tensor = None + quat_w_world: ProxyArray = None """Quaternion orientation `(x, y, z, w)` of the sensor origin in world frame, following the world coordinate frame .. note:: @@ -47,13 +59,13 @@ class CameraData: image_shape: tuple[int, int] = None """A tuple containing (height, width) of the camera sensor.""" - intrinsic_matrices: torch.Tensor = None + intrinsic_matrices: ProxyArray = None """The intrinsic matrices for the camera. Shape is (N, 3, 3) where N is the number of sensors. """ - output: dict[str, torch.Tensor] = None + output: dict[str, ProxyArray] = None """The retrieved sensor data with sensor types as key. The format of the data is available in the `Replicator Documentation`_. For semantic-based data, @@ -82,9 +94,11 @@ def allocate( ) -> CameraData: """Build a :class:`CameraData` with output buffers pre-allocated. - Allocates one ``(num_views, height, width, channels)`` tensor per kind + Allocates one ``(num_views, height, width, channels)`` buffer per kind in the intersection of ``data_types`` and ``supported_specs``, using - the channels and dtype from each :class:`RenderBufferSpec`. + the channels and dtype from each :class:`RenderBufferSpec`. Each buffer + is exposed as a :class:`ProxyArray` (``wp.array`` primary, zero-copy + ``torch.Tensor`` view via ``.torch``). Args: data_types: Requested output names (typically :attr:`CameraCfg.data_types`). @@ -124,19 +138,28 @@ def allocate( if rgb_alias: requested.update({RenderBufferKind.RGB, RenderBufferKind.RGBA}) - buffers: dict[str, torch.Tensor] = {} + # Allocate the underlying torch buffers first so we can preserve the + # rgb-as-view-of-rgba alias as a torch slice (shared memory). Each + # buffer is then wrapped as a ProxyArray over a wp.from_torch view — + # the wp.array and torch.Tensor share the same allocation. + torch_buffers: dict[str, torch.Tensor] = {} for name, spec in supported_specs.items(): if name not in requested: continue if rgb_alias and name == RenderBufferKind.RGB: continue - buffers[str(name)] = torch.zeros( + torch_buffers[str(name)] = torch.zeros( (num_views, height, width, spec.channels), dtype=spec.dtype, device=device, ).contiguous() if rgb_alias: - buffers[str(RenderBufferKind.RGB)] = buffers[str(RenderBufferKind.RGBA)][..., :3] + # Share storage with rgba: the slice is a non-contiguous torch view. + # ``wp.from_torch`` accepts strided views, so the resulting ProxyArray + # tracks the same underlying memory as the rgba buffer. + torch_buffers[str(RenderBufferKind.RGB)] = torch_buffers[str(RenderBufferKind.RGBA)][..., :3] + + buffers: dict[str, ProxyArray] = {name: _wrap(buf) for name, buf in torch_buffers.items()} return cls( image_shape=(height, width), @@ -149,7 +172,7 @@ def allocate( ## @property - def quat_w_ros(self) -> torch.Tensor: + def quat_w_ros(self) -> ProxyArray: """Quaternion orientation `(x, y, z, w)` of the sensor origin in the world frame, following ROS convention. .. note:: @@ -157,10 +180,11 @@ def quat_w_ros(self) -> torch.Tensor: Shape is (N, 4) where N is the number of sensors. """ - return convert_camera_frame_orientation_convention(self.quat_w_world, origin="world", target="ros") + converted = convert_camera_frame_orientation_convention(self.quat_w_world.torch, origin="world", target="ros") + return _wrap(converted.contiguous()) @property - def quat_w_opengl(self) -> torch.Tensor: + def quat_w_opengl(self) -> ProxyArray: """Quaternion orientation `(x, y, z, w)` of the sensor origin in the world frame, following Opengl / USD Camera convention. @@ -169,4 +193,7 @@ def quat_w_opengl(self) -> torch.Tensor: Shape is (N, 4) where N is the number of sensors. """ - return convert_camera_frame_orientation_convention(self.quat_w_world, origin="world", target="opengl") + converted = convert_camera_frame_orientation_convention( + self.quat_w_world.torch, origin="world", target="opengl" + ) + return _wrap(converted.contiguous()) diff --git a/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera.py b/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera.py index a14c16f3d0d9..a706385e6fb4 100644 --- a/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera.py +++ b/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera.py @@ -12,6 +12,7 @@ import isaaclab.utils.math as math_utils from isaaclab.utils.warp import kernels as warp_kernels +from isaaclab.utils.warp.proxy_array import ProxyArray from .kernels import ( CAMERA_RAYCAST_MAX_DIST, @@ -84,9 +85,11 @@ def _initialize_warp_meshes(self): def _create_buffers(self): super()._create_buffers() - self._data.image_mesh_ids = torch.zeros( + # image_mesh_ids primary buffer: torch.zeros + wp.from_torch share memory. + image_mesh_ids_torch = torch.zeros( self._num_envs, *self.image_shape, 1, device=self.device, dtype=torch.int16 - ) + ).contiguous() + self._data.image_mesh_ids = ProxyArray(wp.from_torch(image_mesh_ids_torch)) def _initialize_rays_impl(self): # NOTE: This method intentionally does NOT call super()._initialize_rays_impl() through the MRO @@ -99,7 +102,10 @@ def _initialize_rays_impl(self): # Camera-specific bookkeeping buffers self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long) - self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long) + # Frame count: wp.array primary, ProxyArray-published. + self._frame = ProxyArray( + wp.from_torch(torch.zeros(self._view.count, device=self._device, dtype=torch.long).contiguous()) + ) # Build camera output buffers (intrinsics, image data, etc.) self._create_buffers() @@ -107,7 +113,7 @@ def _initialize_rays_impl(self): # Compute local ray starts/directions from the camera pattern (torch, init-time only) ray_starts_local, ray_directions_local = self.cfg.pattern_cfg.func( - self.cfg.pattern_cfg, self._data.intrinsic_matrices, self._device + self.cfg.pattern_cfg, self._data.intrinsic_matrices.torch, self._device ) self.num_rays = ray_directions_local.shape[1] @@ -124,9 +130,8 @@ def _initialize_rays_impl(self): self._offset_quat = quat_offset.repeat(self._view.count, 1) self._offset_pos = torch.tensor(list(self.cfg.offset.pos), device=self._device).repeat(self._view.count, 1) - # Camera pose buffers (torch, part of CameraData) - self._data.pos_w = torch.zeros(self._view.count, 3, device=self._device) - self._data.quat_w_world = torch.zeros(self._view.count, 4, device=self._device) + # Camera pose buffers (pos_w / quat_w_world) were already allocated as + # ProxyArrays in self._create_buffers() above; no re-init needed here. # Warp-backed camera orientation buffer for warp kernel calls; # updated from self._data.quat_w_world in _update_ray_infos. self._quat_w_wp = wp.zeros(self._view.count, dtype=wp.quatf, device=self._device) @@ -183,9 +188,9 @@ def _update_ray_infos(self, env_mask: wp.array): pos_w, quat_w = math_utils.combine_frame_transforms( pos_w, quat_w, self._offset_pos[env_ids], self._offset_quat[env_ids] ) - # Store camera pose in CameraData (torch tensors) and warp-backed orientation buffer - self._data.pos_w[env_ids] = pos_w - self._data.quat_w_world[env_ids] = quat_w + # Store camera pose in CameraData via the zero-copy torch view of the wp.array primary + self._data.pos_w.torch[env_ids] = pos_w + self._data.quat_w_world.torch[env_ids] = quat_w self._quat_w_wp_torch[env_ids] = quat_w # Rotate local ray starts and directions into world frame using full camera orientation @@ -211,8 +216,8 @@ def _update_buffers_impl(self, env_mask: wp.array): self._update_ray_infos(env_mask) - # Increment frame count for updated environments - self._frame[env_ids] += 1 + # Increment frame count for updated environments (ProxyArray; mutate via .torch) + self._frame.torch[env_ids] += 1 self._update_mesh_transforms() @@ -275,19 +280,23 @@ def _update_buffers_impl(self, env_mask: wp.array): # Apply depth clipping on the intermediate buffer (leaves _ray_distance_cam_w unmodified) self._apply_depth_clipping(env_mask, self._distance_to_image_plane_wp) d2ip_torch = wp.to_torch(self._distance_to_image_plane_wp) - self._data.output["distance_to_image_plane"][env_ids] = d2ip_torch[env_ids].view(-1, *self.image_shape, 1) + self._data.output["distance_to_image_plane"].torch[env_ids] = d2ip_torch[env_ids].view( + -1, *self.image_shape, 1 + ) if "distance_to_camera" in self.cfg.data_types: # d2ip (if requested) was computed before this block so _ray_distance_cam_w is still unclipped. self._apply_depth_clipping(env_mask, self._ray_distance_cam_w) ray_dist_torch = wp.to_torch(self._ray_distance_cam_w) - self._data.output["distance_to_camera"][env_ids] = ray_dist_torch[env_ids].view(-1, *self.image_shape, 1) + self._data.output["distance_to_camera"].torch[env_ids] = ray_dist_torch[env_ids].view( + -1, *self.image_shape, 1 + ) if return_normal: ray_normal_torch = wp.to_torch(self._ray_normal_w) - self._data.output["normals"][env_ids] = ray_normal_torch[env_ids].view(-1, *self.image_shape, 3) + self._data.output["normals"].torch[env_ids] = ray_normal_torch[env_ids].view(-1, *self.image_shape, 3) if self.cfg.update_mesh_ids: - self._data.image_mesh_ids[env_ids] = wp.to_torch(self._ray_mesh_id_w)[env_ids].view( + self._data.image_mesh_ids.torch[env_ids] = wp.to_torch(self._ray_mesh_id_w)[env_ids].view( -1, *self.image_shape, 1 ) diff --git a/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera_data.py b/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera_data.py index 21338f0a0616..8c90e77b7ddf 100644 --- a/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera_data.py +++ b/source/isaaclab/isaaclab/sensors/ray_caster/multi_mesh_ray_caster_camera_data.py @@ -5,9 +5,8 @@ """Data container for the multi-mesh ray-cast camera sensor.""" -import torch - from isaaclab.sensors.camera import CameraData +from isaaclab.utils.warp.proxy_array import ProxyArray class MultiMeshRayCasterCameraData(CameraData): @@ -15,13 +14,13 @@ class MultiMeshRayCasterCameraData(CameraData): This class extends :class:`CameraData` with additional mesh-id information. It does not inherit from :class:`RayCasterData` because the camera variant - manages its own torch-based pose and hit buffers independently from the + manages its own pose and hit buffers independently from the warp-native :class:`RayCasterData`. """ - image_mesh_ids: torch.Tensor = None + image_mesh_ids: ProxyArray = None """The mesh ids of the image pixels. Shape is (N, H, W, 1), where N is the number of sensors, H and W are the height and width of the image, - and 1 is the number of mesh ids per pixel. + and 1 is the number of mesh ids per pixel. Stored as a ``wp.array`` of dtype ``wp.int16``. """ diff --git a/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py b/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py index 257b25698deb..56a8078c78b1 100644 --- a/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py +++ b/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py @@ -17,6 +17,7 @@ import isaaclab.utils.math as math_utils from isaaclab.sensors.camera import CameraData from isaaclab.utils.warp.kernels import raycast_mesh_masked_kernel +from isaaclab.utils.warp.proxy_array import ProxyArray from .kernels import ( ALIGNMENT_BASE, @@ -36,6 +37,19 @@ logger = logging.getLogger(__name__) +def _to_device_tensor(value: wp.array | ProxyArray | torch.Tensor, device: str | torch.device) -> torch.Tensor: + """Normalize an array-like input to a ``torch.Tensor`` on ``device``. + + Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`, + and :class:`torch.Tensor`. Warp inputs are unwrapped via ``wp.to_torch`` (zero-copy). + """ + if isinstance(value, ProxyArray): + value = value.torch + elif isinstance(value, wp.array): + value = wp.to_torch(value) + return value.to(device=device) + + class RayCasterCamera(RayCaster): """A ray-casting camera sensor. @@ -123,8 +137,13 @@ def image_shape(self) -> tuple[int, int]: return (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width) @property - def frame(self) -> torch.tensor: - """Frame number when the measurement took place.""" + def frame(self) -> ProxyArray: + """Frame number when the measurement took place. + + Shape is (N,), dtype ``wp.int64``. Use ``.torch`` for a zero-copy + :class:`torch.Tensor` view or ``.warp`` for the underlying + :class:`warp.array`. + """ return self._frame """ @@ -132,24 +151,32 @@ def frame(self) -> torch.tensor: """ def set_intrinsic_matrices( - self, matrices: torch.Tensor, focal_length: float = 1.0, env_ids: Sequence[int] | None = None + self, + matrices: wp.array | ProxyArray | torch.Tensor, + focal_length: float = 1.0, + env_ids: Sequence[int] | None = None, ): """Set the intrinsic matrix of the camera. Args: - matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). + matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). A + :class:`warp.array` is the canonical input type; :class:`ProxyArray` and + :class:`torch.Tensor` are also accepted. focal_length: Focal length to use when computing aperture values (in cm). Defaults to 1.0. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. """ # resolve env_ids if env_ids is None: env_ids = slice(None) - # save new intrinsic matrices and focal length - self._data.intrinsic_matrices[env_ids] = matrices.to(self._device) + # normalize matrices input to torch on sensor device + matrices = _to_device_tensor(matrices, self._device) + # save new intrinsic matrices and focal length (write via the zero-copy torch view) + intrinsic_view = self._data.intrinsic_matrices.torch + intrinsic_view[env_ids] = matrices self._focal_length = focal_length # recompute ray directions self.ray_starts[env_ids], self.ray_directions[env_ids] = self.cfg.pattern_cfg.func( - self.cfg.pattern_cfg, self._data.intrinsic_matrices[env_ids], self._device + self.cfg.pattern_cfg, intrinsic_view[env_ids], self._device ) # Refresh warp views of local ray buffers; .contiguous() may produce a copy so we store # the contiguous tensors explicitly to prevent GC while the warp views are alive. @@ -176,15 +203,15 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None pos_w, quat_w = math_utils.combine_frame_transforms( pos_w.torch.clone(), quat_w.torch.clone(), self._offset_pos[env_ids], self._offset_quat[env_ids] ) - self._data.pos_w[env_ids] = pos_w - self._data.quat_w_world[env_ids] = quat_w - # Reset the frame count - self._frame[env_ids] = 0 + self._data.pos_w.torch[env_ids] = pos_w + self._data.quat_w_world.torch[env_ids] = quat_w + # Reset the frame count (ProxyArray; mutate via the zero-copy torch view) + self._frame.torch[env_ids] = 0 def set_world_poses( self, - positions: torch.Tensor | None = None, - orientations: torch.Tensor | None = None, + positions: wp.array | ProxyArray | torch.Tensor | None = None, + orientations: wp.array | ProxyArray | torch.Tensor | None = None, env_ids: Sequence[int] | None = None, convention: Literal["opengl", "ros", "world"] = "ros", ): @@ -201,10 +228,13 @@ def set_world_poses( on the conventions. Args: - positions: The cartesian coordinates (in meters). Shape is (N, 3). - Defaults to None, in which case the camera position in not changed. + positions: The cartesian coordinates [m]. Shape is (N, 3). A :class:`warp.array` + is the canonical input type; :class:`ProxyArray` and :class:`torch.Tensor` + are also accepted. Defaults to None, in which case the camera position is + not changed. orientations: The quaternion orientation in (x, y, z, w). Shape is (N, 4). - Defaults to None, in which case the camera orientation in not changed. + Same accepted types as ``positions``. Defaults to None, in which case + the camera orientation is not changed. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. convention: The convention in which the poses are fed. Defaults to "ros". @@ -221,10 +251,12 @@ def set_world_poses( pos_w_torch = pos_w.torch quat_w_torch = quat_w.torch if positions is not None: + positions = _to_device_tensor(positions, self._device) # transform to camera frame pos_offset_world_frame = positions - pos_w_torch self._offset_pos[env_ids] = math_utils.quat_apply(math_utils.quat_inv(quat_w_torch), pos_offset_world_frame) if orientations is not None: + orientations = _to_device_tensor(orientations, self._device) # convert rotation matrix from input convention to world quat_w_set = math_utils.convert_camera_frame_orientation_convention( orientations, origin=convention, target="world" @@ -236,23 +268,31 @@ def set_world_poses( pos_w_out, quat_w_out = math_utils.combine_frame_transforms( pos_w2.torch.clone(), quat_w2.torch.clone(), self._offset_pos[env_ids], self._offset_quat[env_ids] ) - self._data.pos_w[env_ids] = pos_w_out - self._data.quat_w_world[env_ids] = quat_w_out + self._data.pos_w.torch[env_ids] = pos_w_out + self._data.quat_w_world.torch[env_ids] = quat_w_out def set_world_poses_from_view( - self, eyes: torch.Tensor, targets: torch.Tensor, env_ids: Sequence[int] | None = None + self, + eyes: wp.array | ProxyArray | torch.Tensor, + targets: wp.array | ProxyArray | torch.Tensor, + env_ids: Sequence[int] | None = None, ): """Set the poses of the camera from the eye position and look-at target position. Args: - eyes: The positions of the camera's eye. Shape is (N, 3). - targets: The target locations to look at. Shape is (N, 3). + eyes: The positions of the camera's eye. Shape is (N, 3). A :class:`warp.array` + is the canonical input type; :class:`ProxyArray` and :class:`torch.Tensor` + are also accepted. + targets: The target locations to look at. Shape is (N, 3). Same accepted types + as ``eyes``. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. Raises: RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first. NotImplementedError: If the stage up-axis is not "Y" or "Z". """ + eyes = _to_device_tensor(eyes, self._device) + targets = _to_device_tensor(targets, self._device) # get up axis of current stage up_axis = UsdGeom.GetStageUpAxis(self.stage) # camera position and rotation in opengl convention @@ -268,15 +308,17 @@ def set_world_poses_from_view( def _initialize_rays_impl(self): # Create all indices buffer self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long) - # Create frame count buffer - self._frame = torch.zeros(self._view.count, device=self._device, dtype=torch.long) + # Create frame count buffer (wp.array primary, ProxyArray-published). + self._frame = ProxyArray( + wp.from_torch(torch.zeros(self._view.count, device=self._device, dtype=torch.long).contiguous()) + ) # create buffers self._create_buffers() # compute intrinsic matrices self._compute_intrinsic_matrices() - # compute ray starts and directions + # compute ray starts and directions (pattern_cfg.func expects a torch.Tensor) self.ray_starts, self.ray_directions = self.cfg.pattern_cfg.func( - self.cfg.pattern_cfg, self._data.intrinsic_matrices, self._device + self.cfg.pattern_cfg, self._data.intrinsic_matrices.torch, self._device ) self.num_rays = self.ray_directions.shape[1] @@ -349,8 +391,8 @@ def _update_buffers_impl(self, env_mask: wp.array): env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1) if len(env_ids) == 0: return - # increment frame count - self._frame[env_ids] += 1 + # increment frame count (ProxyArray; mutate via the zero-copy torch view) + self._frame.torch[env_ids] += 1 # Update world-frame ray starts/directions and camera pose via warp kernel. # Camera always uses ALIGNMENT_BASE (full orientation) and zero drift. @@ -378,9 +420,9 @@ def _update_buffers_impl(self, env_mask: wp.array): device=self._device, ) - # Write camera pose to CameraData (torch tensors) - self._data.pos_w[env_ids] = self._pos_w_torch[env_ids] - self._data.quat_w_world[env_ids] = self._quat_w_torch[env_ids] + # Write camera pose to CameraData via the zero-copy torch view of the wp.array primary + self._data.pos_w.torch[env_ids] = self._pos_w_torch[env_ids] + self._data.quat_w_world.torch[env_ids] = self._quat_w_torch[env_ids] # Fill ray hit positions with inf before raycasting wp.launch( @@ -447,19 +489,21 @@ def _update_buffers_impl(self, env_mask: wp.array): ) # Apply depth clipping on the intermediate buffer (leaves _ray_distance unmodified) self._apply_depth_clipping(env_mask, self._distance_to_image_plane_wp) - self._data.output["distance_to_image_plane"][env_ids] = self._distance_to_image_plane_torch[env_ids].view( - -1, *self.image_shape, 1 - ) + self._data.output["distance_to_image_plane"].torch[env_ids] = self._distance_to_image_plane_torch[ + env_ids + ].view(-1, *self.image_shape, 1) if "distance_to_camera" in self.cfg.data_types: # d2ip (if requested) was computed before this block so _ray_distance is still unclipped. self._apply_depth_clipping(env_mask, self._ray_distance) - self._data.output["distance_to_camera"][env_ids] = self._ray_distance_torch[env_ids].view( + self._data.output["distance_to_camera"].torch[env_ids] = self._ray_distance_torch[env_ids].view( -1, *self.image_shape, 1 ) if "normals" in self.cfg.data_types: - self._data.output["normals"][env_ids] = self._ray_normal_w_torch[env_ids].view(-1, *self.image_shape, 3) + self._data.output["normals"].torch[env_ids] = self._ray_normal_w_torch[env_ids].view( + -1, *self.image_shape, 3 + ) def _debug_vis_callback(self, event): # in case it crashes be safe @@ -525,17 +569,28 @@ def _check_supported_data_types(self, cfg: RayCasterCameraCfg): ) def _create_buffers(self): - """Create buffers for storing data.""" + """Create buffers for storing data. + + Each :class:`CameraData` field stores a ``wp.array`` primary buffer + (allocated via ``wp.from_torch`` over a contiguous ``torch.zeros`` tensor) + and is exposed as a :class:`ProxyArray`. The torch view is the canonical + write target for in-place updates from python code. + """ # prepare drift (kept as torch tensors so subclasses may use torch indexing) self.drift = torch.zeros(self._view.count, 3, device=self.device) self.ray_cast_drift = torch.zeros(self._view.count, 3, device=self.device) # create the data object # -- pose of the cameras - self._data.pos_w = torch.zeros((self._view.count, 3), device=self._device) - self._data.quat_w_world = torch.zeros((self._view.count, 4), device=self._device) + self._data.pos_w = ProxyArray( + wp.from_torch(torch.zeros((self._view.count, 3), device=self._device).contiguous()) + ) + self._data.quat_w_world = ProxyArray( + wp.from_torch(torch.zeros((self._view.count, 4), device=self._device).contiguous()) + ) # -- intrinsic matrix - self._data.intrinsic_matrices = torch.zeros((self._view.count, 3, 3), device=self._device) - self._data.intrinsic_matrices[:, 2, 2] = 1.0 + intrinsic_torch = torch.zeros((self._view.count, 3, 3), device=self._device).contiguous() + intrinsic_torch[:, 2, 2] = 1.0 + self._data.intrinsic_matrices = ProxyArray(wp.from_torch(intrinsic_torch)) self._data.image_shape = self.image_shape # -- output data # create the buffers to store the annotator data. @@ -548,8 +603,9 @@ def _create_buffers(self): shape = (self.cfg.pattern_cfg.height, self.cfg.pattern_cfg.width, 3) else: raise ValueError(f"Received unknown data type: {name}. Please check the configuration.") - # allocate tensor to store the data - self._data.output[name] = torch.zeros((self._view.count, *shape), device=self._device) + # allocate buffer (torch.zeros + wp.from_torch shares memory) and wrap as ProxyArray + buf = torch.zeros((self._view.count, *shape), device=self._device).contiguous() + self._data.output[name] = ProxyArray(wp.from_torch(buf)) def _compute_intrinsic_matrices(self): """Computes the intrinsic matrices for the camera based on the config provided.""" @@ -566,11 +622,12 @@ def _compute_intrinsic_matrices(self): f_y = pattern_cfg.height * pattern_cfg.focal_length / pattern_cfg.vertical_aperture c_x = pattern_cfg.horizontal_aperture_offset * f_x + pattern_cfg.width / 2 c_y = pattern_cfg.vertical_aperture_offset * f_y + pattern_cfg.height / 2 - # allocate the intrinsic matrices - self._data.intrinsic_matrices[:, 0, 0] = f_x - self._data.intrinsic_matrices[:, 0, 2] = c_x - self._data.intrinsic_matrices[:, 1, 1] = f_y - self._data.intrinsic_matrices[:, 1, 2] = c_y + # write into the zero-copy torch view of the wp.array primary buffer + intrinsic_view = self._data.intrinsic_matrices.torch + intrinsic_view[:, 0, 0] = f_x + intrinsic_view[:, 0, 2] = c_x + intrinsic_view[:, 1, 1] = f_y + intrinsic_view[:, 1, 2] = c_y # save focal length self._focal_length = pattern_cfg.focal_length diff --git a/source/isaaclab/test/renderers/test_camera_output_contract.py b/source/isaaclab/test/renderers/test_camera_output_contract.py index 2d6087d29708..208a814a796a 100644 --- a/source/isaaclab/test/renderers/test_camera_output_contract.py +++ b/source/isaaclab/test/renderers/test_camera_output_contract.py @@ -158,11 +158,13 @@ def test_camera_data_allocates_supported_subset_and_aliases_rgb(): ) assert set(data.output.keys()) == {"rgba", "rgb", "depth"} - assert data.output["rgba"].shape == (2, 8, 16, 4) - assert data.output["rgba"].dtype == torch.uint8 - assert data.output["depth"].shape == (2, 8, 16, 1) - assert data.output["depth"].dtype == torch.float32 - assert data.output["rgb"].data_ptr() == data.output["rgba"].data_ptr() + # ProxyArray exposes torch dtype/shape via .torch and the underlying wp.array via .warp. + assert tuple(data.output["rgba"].shape) == (2, 8, 16, 4) + assert data.output["rgba"].torch.dtype == torch.uint8 + assert tuple(data.output["depth"].shape) == (2, 8, 16, 1) + assert data.output["depth"].torch.dtype == torch.float32 + # rgb is a non-contiguous torch view of rgba; both ProxyArrays share the same allocation. + assert data.output["rgb"].torch.data_ptr() == data.output["rgba"].torch.data_ptr() assert data.image_shape == (8, 16) assert data.info == {"rgba": None, "rgb": None, "depth": None} @@ -206,10 +208,31 @@ def test_camera_data_segmentation_dtype_follows_supported_spec(): data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=colorized_specs ) - assert raw.output["instance_segmentation_fast"].dtype == torch.int32 - assert raw.output["instance_segmentation_fast"].shape == (1, 4, 4, 1) - assert colorized.output["instance_segmentation_fast"].dtype == torch.uint8 - assert colorized.output["instance_segmentation_fast"].shape == (1, 4, 4, 4) + assert raw.output["instance_segmentation_fast"].torch.dtype == torch.int32 + assert tuple(raw.output["instance_segmentation_fast"].shape) == (1, 4, 4, 1) + assert colorized.output["instance_segmentation_fast"].torch.dtype == torch.uint8 + assert tuple(colorized.output["instance_segmentation_fast"].shape) == (1, 4, 4, 4) + + +def test_camera_data_output_proxyarray_zero_copy(): + """Mutations through .torch are visible through .warp on every output buffer.""" + cfg = _make_camera_cfg(["rgb", "rgba", "depth"]) + specs = { + RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8), + RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8), + RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32), + } + data = CameraData.allocate( + data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=specs + ) + + # Write through the torch view; ensure the warp array sees the same memory. + rgba_proxy = data.output["rgba"] + rgba_proxy.torch[0, 0, 0, 0] = 7 + assert rgba_proxy.warp.numpy()[0, 0, 0, 0] == 7 + + # The rgb proxy is a torch view of the rgba memory, so the same write is visible. + assert data.output["rgb"].torch[0, 0, 0, 0].item() == 7 def test_camera_data_allocate_raises_on_unknown_name(): diff --git a/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py b/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py index 720a85223aa6..ab8bb659ae0f 100644 --- a/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py +++ b/source/isaaclab_contrib/isaaclab_contrib/sensors/tacsl_sensor/visuotactile_sensor.py @@ -224,10 +224,11 @@ def get_initial_render(self) -> dict | None: if initial_render is None: raise RuntimeError("Initial render is None") - # Store the initial nominal tactile data + # Store the initial nominal tactile data; output values are ProxyArray — + # snapshot the torch view to keep the existing torch.Tensor downstream contract. self._nominal_tactile = dict() for key, value in initial_render.items(): - self._nominal_tactile[key] = value.clone() + self._nominal_tactile[key] = value.torch.clone() return self._nominal_tactile @@ -583,7 +584,8 @@ def _update_camera_tactile(self, env_ids: Sequence[int] | slice): depth_key = "depth" if depth_key: - self._data.tactile_depth_image[env_ids] = camera_data.output[depth_key][env_ids].clone() + # camera_data.output[depth_key] is ProxyArray; index its torch view explicitly. + self._data.tactile_depth_image[env_ids] = camera_data.output[depth_key].torch[env_ids].clone() diff = self._nominal_tactile[depth_key][env_ids] - self._data.tactile_depth_image[env_ids] self._data.tactile_rgb_image[env_ids] = self._tactile_rgb_render.render(diff.squeeze(-1)) diff --git a/source/isaaclab_newton/changelog.d/camera-data-proxyarray.skip b/source/isaaclab_newton/changelog.d/camera-data-proxyarray.skip new file mode 100644 index 000000000000..7bb765d75772 --- /dev/null +++ b/source/isaaclab_newton/changelog.d/camera-data-proxyarray.skip @@ -0,0 +1,2 @@ +Internal-only update: NewtonWarpRenderer signatures realigned with the BaseRenderer +wp.array contract. No public API changes beyond what the isaaclab fragment covers. diff --git a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py index a02d820f2951..d6e314ed2d46 100644 --- a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py +++ b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py @@ -52,18 +52,18 @@ def __init__(self, newton_sensor: newton.sensors.SensorTiledCamera, spec: Camera self.width = getattr(spec.cfg, "width", 100) self.height = getattr(spec.cfg, "height", 100) - def set_outputs(self, output_data: dict[str, torch.Tensor]): - for output_name, tensor_data in output_data.items(): + def set_outputs(self, output_data: dict[str, wp.array]): + for output_name, wp_data in output_data.items(): if output_name == RenderBufferKind.RGBA: - self.outputs.color_image = self._from_torch(tensor_data, dtype=wp.uint32) + self.outputs.color_image = self._typed_view(wp_data, dtype=wp.uint32) elif output_name == RenderBufferKind.ALBEDO: - self.outputs.albedo_image = self._from_torch(tensor_data, dtype=wp.uint32) + self.outputs.albedo_image = self._typed_view(wp_data, dtype=wp.uint32) elif output_name == RenderBufferKind.DEPTH: - self.outputs.depth_image = self._from_torch(tensor_data, dtype=wp.float32) + self.outputs.depth_image = self._typed_view(wp_data, dtype=wp.float32) elif output_name == RenderBufferKind.NORMALS: - self.outputs.normals_image = self._from_torch(tensor_data, dtype=wp.vec3f) + self.outputs.normals_image = self._typed_view(wp_data, dtype=wp.vec3f) elif output_name == RenderBufferKind.INSTANCE_SEGMENTATION_FAST: - self.outputs.instance_segmentation_image = self._from_torch(tensor_data, dtype=wp.uint32) + self.outputs.instance_segmentation_image = self._typed_view(wp_data, dtype=wp.uint32) elif output_name == RenderBufferKind.RGB: pass else: @@ -82,9 +82,10 @@ def get_output(self, output_name: str) -> wp.array: return self.outputs.instance_segmentation_image return None - def update(self, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor): + def update(self, positions: wp.array, orientations: wp.array, intrinsics: wp.array): + # The convention helper expects a torch tensor; use the zero-copy view of the wp.array. converted_orientations = convert_camera_frame_orientation_convention( - orientations, origin="world", target="opengl" + wp.to_torch(orientations), origin="world", target="opengl" ) self.camera_transforms = wp.empty( @@ -98,29 +99,26 @@ def update(self, positions: torch.Tensor, orientations: torch.Tensor, intrinsics ) if self.camera_rays is None: - first_focal_length = intrinsics[:, 1, 1][0:1] + intrinsics_torch = wp.to_torch(intrinsics) + first_focal_length = intrinsics_torch[:, 1, 1][0:1] fov_radians_all = 2.0 * torch.atan(self.height / (2.0 * first_focal_length)) self.camera_rays = self.newton_sensor.utils.compute_pinhole_camera_rays( self.width, self.height, wp.from_torch(fov_radians_all, dtype=wp.float32) ) - def _from_torch(self, tensor: torch.Tensor, dtype) -> wp.array: - proxy_array = wp.from_torch(tensor) - if tensor.is_contiguous(): - return wp.array( - ptr=proxy_array.ptr, - dtype=dtype, - shape=(self.newton_sensor.model.world_count, self.num_cameras, self.height, self.width), - device=proxy_array.device, - copy=False, - ) + def _typed_view(self, wp_data: wp.array, dtype) -> wp.array: + """Reinterpret the output buffer as ``(world_count, num_cameras, height, width)`` of ``dtype``. - logger.warning("NewtonWarpRenderer - torch output array is non-contiguous") - return wp.zeros( - (self.newton_sensor.model.world_count, self.num_cameras, self.height, self.width), + The Newton sensor writes per-pixel typed records; the camera buffer is allocated as a + 4-D scalar array of the corresponding torch dtype. Both views share the same memory. + """ + return wp.array( + ptr=wp_data.ptr, dtype=dtype, - device=proxy_array.device, + shape=(self.newton_sensor.model.world_count, self.num_cameras, self.height, self.width), + device=wp_data.device, + copy=False, ) @wp.kernel @@ -202,7 +200,7 @@ def create_render_data(self, spec: CameraRenderSpec) -> RenderData: See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.create_render_data`.""" return RenderData(self.newton_sensor, spec) - def set_outputs(self, render_data: RenderData, output_data: dict[str, torch.Tensor]): + def set_outputs(self, render_data: RenderData, output_data: dict[str, wp.array]): """Store output buffers. See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs`.""" render_data.set_outputs(output_data) @@ -211,9 +209,7 @@ def update_transforms(self): See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_transforms`.""" SimulationContext.instance().update_scene_data_provider(True) - def update_camera( - self, render_data: RenderData, positions: torch.Tensor, orientations: torch.Tensor, intrinsics: torch.Tensor - ): + def update_camera(self, render_data: RenderData, positions: wp.array, orientations: wp.array, intrinsics: wp.array): """Update camera poses and intrinsics. See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_camera`.""" render_data.update(positions, orientations, intrinsics) @@ -241,9 +237,9 @@ def read_output(self, render_data: RenderData, camera_data: CameraData) -> None: continue image_data = render_data.get_output(output_name) if image_data is not None: - output_data = camera_data.output[output_name] - if image_data.ptr != output_data.data_ptr(): - wp.copy(wp.from_torch(output_data), image_data) + output_proxy = camera_data.output[output_name] + if image_data.ptr != output_proxy.warp.ptr: + wp.copy(output_proxy.warp, image_data) def cleanup(self, render_data: RenderData | None): """Release resources. No-op for Newton Warp. diff --git a/source/isaaclab_ov/changelog.d/camera-data-proxyarray.skip b/source/isaaclab_ov/changelog.d/camera-data-proxyarray.skip new file mode 100644 index 000000000000..e493955cdea2 --- /dev/null +++ b/source/isaaclab_ov/changelog.d/camera-data-proxyarray.skip @@ -0,0 +1,2 @@ +Internal-only update: OVRTXRenderer signatures realigned with the BaseRenderer +wp.array contract. No public API changes beyond what the isaaclab fragment covers. diff --git a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py index 5d1782373d87..624b984c690d 100644 --- a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py +++ b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py @@ -374,35 +374,36 @@ def create_render_data(self, spec: CameraRenderSpec) -> OVRTXRenderData: torch.int32: wp.int32, } - def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, torch.Tensor]) -> None: - """Wrap caller-owned torch output tensors as zero-copy warp arrays. + def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, wp.array]) -> None: + """Wrap caller-owned ``wp.array`` output buffers with the strict per-kind dtype OVRTX expects. Aliased views over a contiguous sibling (e.g. ``rgb`` over ``rgba``) are - skipped; any other non-contiguous tensor raises ``ValueError``. + skipped; any other non-contiguous array raises ``ValueError``. See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs`. """ render_data.warp_buffers = {} - for name, tensor in output_data.items(): - if not tensor.is_contiguous(): - if tensor.data_ptr() in {t.data_ptr() for t in output_data.values() if t.is_contiguous()}: + contiguous_ptrs = {arr.ptr for arr in output_data.values() if arr.is_contiguous} + for name, arr in output_data.items(): + if not arr.is_contiguous: + if arr.ptr in contiguous_ptrs: continue raise ValueError( f"OVRTXRenderer.set_outputs: output '{name}' is non-contiguous and is not an" - " alias of a contiguous output tensor; cannot wrap as a zero-copy warp array." + " alias of a contiguous output buffer; cannot rewrap with the OVRTX dtype." ) - wp_dtype = self._TORCH_TO_WP_DTYPE.get(tensor.dtype) + torch_dtype = wp.to_torch(arr).dtype + wp_dtype = self._TORCH_TO_WP_DTYPE.get(torch_dtype) if wp_dtype is None: raise ValueError( - f"OVRTXRenderer.set_outputs: unsupported torch dtype {tensor.dtype} for output" + f"OVRTXRenderer.set_outputs: unsupported dtype {torch_dtype} for output" f" '{name}'. Add it to OVRTXRenderer._TORCH_TO_WP_DTYPE." ) - torch_array = wp.from_torch(tensor) render_data.warp_buffers[name] = wp.array( - ptr=torch_array.ptr, + ptr=arr.ptr, dtype=wp_dtype, - shape=tuple(tensor.shape), - device=torch_array.device, + shape=tuple(arr.shape), + device=arr.device, copy=False, ) @@ -436,14 +437,19 @@ def update_transforms(self) -> None: def update_camera( self, render_data: OVRTXRenderData, - positions: torch.Tensor, - orientations: torch.Tensor, - intrinsics: torch.Tensor, + positions: wp.array, + orientations: wp.array, + intrinsics: wp.array, ) -> None: """Update camera transforms in OVRTX binding.""" num_envs = positions.shape[0] - camera_quats_opengl = convert_camera_frame_orientation_convention(orientations, origin="world", target="opengl") - camera_positions_wp = wp.from_torch(positions.contiguous(), dtype=wp.vec3) + # convention helper expects torch; use the zero-copy view of the wp.array + camera_quats_opengl = convert_camera_frame_orientation_convention( + wp.to_torch(orientations), origin="world", target="opengl" + ) + camera_positions_wp = wp.array( + ptr=positions.ptr, dtype=wp.vec3, shape=(positions.shape[0],), device=positions.device, copy=False + ) camera_orientations_wp = wp.from_torch(camera_quats_opengl.contiguous(), dtype=wp.quatf) camera_transforms = wp.zeros(num_envs, dtype=wp.mat44d, device=DEVICE) wp.launch( diff --git a/source/isaaclab_physx/changelog.d/camera-data-proxyarray.minor.rst b/source/isaaclab_physx/changelog.d/camera-data-proxyarray.minor.rst new file mode 100644 index 000000000000..31e7a21aa0a4 --- /dev/null +++ b/source/isaaclab_physx/changelog.d/camera-data-proxyarray.minor.rst @@ -0,0 +1,13 @@ +Changed +^^^^^^^ + +* **Breaking:** :class:`~isaaclab_physx.renderers.IsaacRtxRenderer` now writes + rendered tiles into ``dict[str, wp.array]`` output buffers (was + ``dict[str, torch.Tensor]``). + :meth:`~isaaclab_physx.renderers.IsaacRtxRenderer.set_outputs` and + :meth:`~isaaclab_physx.renderers.IsaacRtxRenderer.update_camera` accept + :class:`warp.array` arguments to match the new + :class:`~isaaclab.renderers.base_renderer.BaseRenderer` contract. The rgb + alias of rgba is preallocated as a torch view in + :meth:`~isaaclab.sensors.camera.CameraData.allocate` rather than rebound at + render time. diff --git a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py index 242ac3729d0b..10f90e53c958 100644 --- a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py +++ b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py @@ -72,7 +72,7 @@ class IsaacRtxRenderData: annotators: dict[str, Any] render_product_paths: list[str] - output_data: dict[str, torch.Tensor] | None = None + output_data: dict[str, wp.array] | None = None spec: CameraRenderSpec | None = None renderer_info: dict[str, Any] = field(default_factory=dict) @@ -277,7 +277,7 @@ def _resolve_simple_shading_mode(self, spec: CameraRenderSpec) -> int | None: ) return SIMPLE_SHADING_MODES[requested[0]] - def set_outputs(self, render_data: IsaacRtxRenderData, output_data: dict[str, torch.Tensor]): + def set_outputs(self, render_data: IsaacRtxRenderData, output_data: dict[str, wp.array]): """Store reference to output buffers for writing during render. See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.set_outputs`.""" render_data.output_data = output_data @@ -290,9 +290,9 @@ def update_transforms(self) -> None: def update_camera( self, render_data: IsaacRtxRenderData, - positions: torch.Tensor, - orientations: torch.Tensor, - intrinsics: torch.Tensor, + positions: wp.array, + orientations: wp.array, + intrinsics: wp.array, ): """No-op for Replicator - uses USD camera prims directly. See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.update_camera`.""" @@ -364,35 +364,38 @@ def tiling_grid_shape(): if data_type in SIMPLE_SHADING_MODES: tiled_data_buffer = tiled_data_buffer[:, :, :3].contiguous() + output_wp = output_data[data_type] wp.launch( kernel=reshape_tiled_image, dim=(view_count, cfg.height, cfg.width), inputs=[ tiled_data_buffer.flatten(), - wp.from_torch(output_data[data_type]), - *list(output_data[data_type].shape[1:]), + output_wp, + *list(output_wp.shape[1:]), num_tiles_x, ], device=device, ) - # alias rgb as first 3 channels of rgba - if data_type == "rgba" and "rgb" in cfg.data_types: - output_data["rgb"] = output_data["rgba"][..., :3] + # rgb is pre-allocated as a non-contiguous view of rgba in CameraData.allocate(), + # so the kernel write to rgba is automatically reflected in rgb. No rebind needed. # NOTE: The `distance_to_camera` annotator returns the distance to the camera optical center. # However, the replicator depth clipping is applied w.r.t. to the image plane which may result # in values larger than the clipping range in the output. We apply an additional clipping to # ensure values are within the clipping range for all the annotators. if data_type == "distance_to_camera": - output_data[data_type][output_data[data_type] > cfg.spawn.clipping_range[1]] = torch.inf + # Use the zero-copy torch view of the wp.array primary for the in-place clip. + view = wp.to_torch(output_wp) + view[view > cfg.spawn.clipping_range[1]] = torch.inf # apply defined clipping behavior if ( data_type in ("distance_to_camera", "distance_to_image_plane", "depth") and self.cfg.depth_clipping_behavior != "none" ): - output_data[data_type][torch.isinf(output_data[data_type])] = ( + view = wp.to_torch(output_wp) + view[torch.isinf(view)] = ( 0.0 if self.cfg.depth_clipping_behavior == "zero" else cfg.spawn.clipping_range[1] ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py index 807c0cc09d91..e2934acc8370 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/cartpole/cartpole_camera_env.py @@ -96,31 +96,33 @@ def _apply_action(self) -> None: def _get_observations(self) -> dict: data_type = self.cfg.tiled_camera.data_types[0] + # CameraData.output entries are ProxyArray; .torch yields a zero-copy torch view. + output_torch = self._tiled_camera.data.output[data_type].torch if "rgb" in self.cfg.tiled_camera.data_types: - camera_data = self._tiled_camera.data.output[data_type] / 255.0 + camera_data = output_torch / 255.0 # normalize the camera data for better training results mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True) camera_data -= mean_tensor elif "albedo" in self.cfg.tiled_camera.data_types: - camera_data = self._tiled_camera.data.output[data_type][..., :3] / 255.0 + camera_data = output_torch[..., :3] / 255.0 # normalize the camera data for better training results mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True) camera_data -= mean_tensor elif data_type in SIMPLE_SHADING_TYPES: - camera_data = self._tiled_camera.data.output[data_type] / 255.0 + camera_data = output_torch / 255.0 # normalize the camera data for better training results mean_tensor = torch.mean(camera_data, dim=(1, 2), keepdim=True) camera_data -= mean_tensor elif "depth" in self.cfg.tiled_camera.data_types: - camera_data = self._tiled_camera.data.output[data_type] + camera_data = output_torch camera_data[camera_data == float("inf")] = 0 elif "semantic_segmentation" in self.cfg.tiled_camera.data_types: - camera_data = self._tiled_camera.data.output[data_type] + camera_data = output_torch observations = {"policy": camera_data.clone()} if self.cfg.write_image_to_file: - save_images_to_file(self._tiled_camera.data.output[data_type] / 255.0, f"cartpole_{data_type}.png") + save_images_to_file(output_torch / 255.0, f"cartpole_{data_type}.png") return observations diff --git a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py index 7da4db48e853..3e73839906db 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py +++ b/source/isaaclab_tasks/isaaclab_tasks/direct/shadow_hand/shadow_hand_vision_env.py @@ -69,8 +69,11 @@ def _compute_image_observations(self): object_pose = torch.cat([self.object_pos, self.gt_keypoints.view(-1, 24)], dim=-1) # train CNN to regress on keypoint positions + # CameraData.output entries are ProxyArray; the feature extractor expects + # torch tensors, so unwrap each entry via .torch (zero-copy view). + camera_output_torch = {k: v.torch for k, v in self._tiled_camera.data.output.items()} pose_loss, embeddings = self.feature_extractor.step( - self._tiled_camera.data.output, + camera_output_torch, object_pose, ) diff --git a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py index fe666a7ecffe..b7fad5087aaa 100644 --- a/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py +++ b/source/isaaclab_tasks/isaaclab_tasks/manager_based/manipulation/dexsuite/mdp/observations.py @@ -215,7 +215,8 @@ def __init__(self, cfg, env: ManagerBasedRLEnv): def __call__( self, env: ManagerBasedRLEnv, sensor_cfg: SceneEntityCfg, normalize: bool = True ) -> torch.Tensor: # obtain the input image - images = self.sensor.data.output[self.sensor_type] + # CameraData.output entries are ProxyArray; .torch is a zero-copy view. + images = self.sensor.data.output[self.sensor_type].torch torch.nan_to_num_(images, nan=1e6) if normalize: images = self.norm_fn(images) From c5ba2a1b3ec71a9336a2d57d838284559708a143 Mon Sep 17 00:00:00 2001 From: Horde Date: Thu, 7 May 2026 21:07:41 +0000 Subject: [PATCH 2/3] Updates --- .../camera-data-proxyarray.minor.rst | 13 + .../isaaclab/renderers/output_contract.py | 12 +- .../isaaclab/sensors/camera/camera.py | 454 +++++++++++------- .../isaaclab/sensors/camera/camera_data.py | 77 +-- .../isaaclab/sensors/camera/kernels.py | 318 ++++++++++++ .../sensors/camera/orientation_conventions.py | 240 +++++++++ .../sensors/ray_caster/ray_caster_camera.py | 19 +- .../isaaclab/isaaclab/utils/warp/kernels.py | 26 + .../renderers/test_camera_output_contract.py | 25 +- .../renderers/newton_warp_renderer.py | 61 ++- .../isaaclab_ov/renderers/ovrtx_renderer.py | 100 ++-- .../renderers/isaac_rtx_renderer.py | 52 +- 12 files changed, 1069 insertions(+), 328 deletions(-) create mode 100644 source/isaaclab/isaaclab/sensors/camera/kernels.py create mode 100644 source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py diff --git a/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst b/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst index 080dff3cf95a..47a7cb797798 100644 --- a/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst +++ b/source/isaaclab/changelog.d/camera-data-proxyarray.minor.rst @@ -36,3 +36,16 @@ Changed ``wp.array | ProxyArray | torch.Tensor | numpy.ndarray`` inputs, with :class:`warp.array` documented as the canonical type. Existing ``torch.Tensor`` callers continue to work without changes. +* **Breaking:** :attr:`~isaaclab.renderers.RenderBufferSpec.dtype` now stores a + Warp scalar dtype (e.g. ``warp.uint8``) rather than a ``torch.dtype``. + Renderer authors that build :class:`RenderBufferSpec` instances must update + ``RenderBufferSpec(channels, torch.uint8)`` to ``RenderBufferSpec(channels, + wp.uint8)`` (and similarly for ``torch.float32``, ``torch.int32``). +* :class:`~isaaclab.sensors.camera.Camera`, + :class:`~isaaclab.sensors.camera.CameraData`, and the three first-party + renderers (:class:`IsaacRtxRenderer`, :class:`NewtonWarpRenderer`, + :class:`OVRTXRenderer`) no longer import or use ``torch`` internally: every + storage allocation goes through :func:`warp.zeros`, scatter / mask / clipping + operations are warp kernels (see + :mod:`~isaaclab.sensors.camera.kernels`), and quaternion convention + conversions are implemented by :func:`isaaclab.sensors.camera.orientation_conventions.convert_quat_array`. diff --git a/source/isaaclab/isaaclab/renderers/output_contract.py b/source/isaaclab/isaaclab/renderers/output_contract.py index bfa3fff41d8b..7b04ae167caf 100644 --- a/source/isaaclab/isaaclab/renderers/output_contract.py +++ b/source/isaaclab/isaaclab/renderers/output_contract.py @@ -14,10 +14,7 @@ from dataclasses import dataclass from enum import StrEnum -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - import torch +from typing import Any class RenderBufferKind(StrEnum): @@ -48,7 +45,8 @@ class RenderBufferSpec: """Per-pixel layout (channels + dtype) for one render buffer kind.""" channels: int - """Number of per-pixel channels (last dimension of the allocated tensor).""" + """Number of per-pixel channels (last dimension of the allocated buffer).""" - dtype: torch.dtype - """Torch dtype the renderer writes for this render buffer kind.""" + dtype: Any + """Warp scalar dtype (e.g. ``warp.uint8``, ``warp.float32``, ``warp.int32``) + the renderer writes for this render buffer kind.""" diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py index 2ba26c672c73..b63f44a044ce 100644 --- a/source/isaaclab/isaaclab/sensors/camera/camera.py +++ b/source/isaaclab/isaaclab/sensors/camera/camera.py @@ -7,10 +7,9 @@ import logging from collections.abc import Sequence -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Any, Literal import numpy as np -import torch import warp as wp from pxr import UsdGeom @@ -21,15 +20,12 @@ from isaaclab.renderers import BaseRenderer, CameraRenderSpec from isaaclab.sim.views import FrameView from isaaclab.utils import to_camel_case -from isaaclab.utils.math import ( - convert_camera_frame_orientation_convention, - create_rotation_matrix_from_view, - quat_from_matrix, -) from isaaclab.utils.warp.proxy_array import ProxyArray from ..sensor_base import SensorBase +from . import kernels as camera_kernels from .camera_data import CameraData, RenderBufferKind +from .orientation_conventions import _resolve_conversion_quat, convert_quat_array if TYPE_CHECKING: from .camera_cfg import CameraCfg @@ -38,6 +34,36 @@ logger = logging.getLogger(__name__) +def _to_wp_array(value: Any, device: str, dtype: Any | None = None) -> wp.array: + """Normalize an array-like input to a :class:`warp.array` on ``device``. + + Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`, + :class:`numpy.ndarray`, lists/tuples of numbers, and torch tensors (detected + structurally via :func:`warp.from_torch`'s interop — no torch import needed). + + The ``dtype`` argument, if provided, controls the returned ``wp.array`` dtype. + """ + # ProxyArray and wp.array are the canonical fast paths. + if isinstance(value, ProxyArray): + arr = value.warp + if dtype is not None and arr.dtype is not dtype: + arr = wp.array(ptr=arr.ptr, dtype=dtype, shape=arr.shape, device=arr.device, copy=False) + return arr + if isinstance(value, wp.array): + if dtype is not None and value.dtype is not dtype: + value = wp.array(ptr=value.ptr, dtype=dtype, shape=value.shape, device=value.device, copy=False) + return value + # Torch tensors are detected by their module and the presence of ``data_ptr``, + # so this module never imports torch itself. ``wp.from_torch`` lives in warp's + # interop layer. + if type(value).__module__.startswith("torch") and hasattr(value, "data_ptr"): + contiguous = value.contiguous() if hasattr(value, "is_contiguous") and not value.is_contiguous() else value + return wp.from_torch(contiguous, dtype=dtype) if dtype is not None else wp.from_torch(contiguous) + # Numpy / lists / tuples — copy onto the device. + np_value = np.asarray(value) + return wp.array(np_value, dtype=dtype, device=device) + + class Camera(SensorBase): r"""The camera sensor for acquiring visual data. @@ -118,12 +144,22 @@ def __init__(self, cfg: CameraCfg): if renderer_type == "isaac_rtx": get_settings_manager().set_bool("/isaaclab/render/rtx_sensors", True) - # Compute camera orientation (convention conversion) and spawn - rot = torch.tensor(self.cfg.offset.rot, dtype=torch.float32, device="cpu").unsqueeze(0) - rot_offset = convert_camera_frame_orientation_convention( - rot, origin=self.cfg.offset.convention, target="opengl" + # Compute the spawn orientation in OpenGL convention without involving torch: + # the cfg-supplied quaternion is constant per cfg, so we resolve the + # convention conversion as a single quaternion product on the CPU. + rot_xyzw = tuple(float(c) for c in self.cfg.offset.rot) + conv = _resolve_conversion_quat(self.cfg.offset.convention, "opengl") + sx, sy, sz, sw = rot_xyzw + cx, cy, cz, cw = conv + rot_offset = np.array( + [ + sw * cx + sx * cw + sy * cz - sz * cy, + sw * cy - sx * cz + sy * cw + sz * cx, + sw * cz + sx * cy - sy * cx + sz * cw, + sw * cw - sx * cx - sy * cy - sz * cz, + ], + dtype=np.float32, ) - rot_offset = rot_offset.squeeze(0).cpu().numpy() if self.cfg.spawn is not None and self.cfg.spawn.vertical_aperture is None: self.cfg.spawn.vertical_aperture = self.cfg.spawn.horizontal_aperture * self.cfg.height / self.cfg.width self._resolve_and_spawn("camera", translation=self.cfg.offset.pos, orientation=rot_offset) @@ -191,7 +227,7 @@ def image_shape(self) -> tuple[int, int]: def set_intrinsic_matrices( self, - matrices: wp.array | ProxyArray | torch.Tensor | np.ndarray, + matrices: wp.array | ProxyArray | Any, focal_length: float | None = None, env_ids: Sequence[int] | None = None, ): @@ -215,29 +251,26 @@ def set_intrinsic_matrices( matrices: The intrinsic matrices for the camera. Shape is (N, 3, 3). A :class:`warp.array` is the canonical input type; :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`, - :class:`torch.Tensor`, and :class:`numpy.ndarray` are also accepted. + :class:`numpy.ndarray`, and :class:`torch.Tensor` are also accepted. focal_length: Perspective focal length (in cm) used to calculate pixel size. Defaults to None. If None, focal_length will be calculated 1 / width. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. """ # resolve env_ids if env_ids is None: - env_ids = self._ALL_INDICES - # normalize input: ProxyArray → wp.array → torch.Tensor → numpy - if isinstance(matrices, ProxyArray): - matrices = matrices.torch - elif isinstance(matrices, wp.array): - matrices = wp.to_torch(matrices) - if isinstance(matrices, torch.Tensor): - matrices = matrices.cpu().numpy() - else: - matrices = np.asarray(matrices, dtype=float) + env_ids = list(range(self._view.count)) + # USD attribute writes are scalar host-side; pull a numpy view of the input. + matrices_wp = _to_wp_array(matrices, device=self._device) + matrices_np = matrices_wp.numpy() # iterate over env_ids - for i, intrinsic_matrix in zip(env_ids, matrices): + for i, intrinsic_matrix in zip(env_ids, matrices_np): height, width = self.image_shape params = sensor_utils.convert_camera_intrinsics_to_usd( - intrinsic_matrix=intrinsic_matrix.reshape(-1), height=height, width=width, focal_length=focal_length + intrinsic_matrix=np.asarray(intrinsic_matrix, dtype=float).reshape(-1), + height=height, + width=width, + focal_length=focal_length, ) # change data for corresponding camera index @@ -262,8 +295,8 @@ def set_intrinsic_matrices( def set_world_poses( self, - positions: wp.array | ProxyArray | torch.Tensor | np.ndarray | None = None, - orientations: wp.array | ProxyArray | torch.Tensor | np.ndarray | None = None, + positions: wp.array | ProxyArray | Any | None = None, + orientations: wp.array | ProxyArray | Any | None = None, env_ids: Sequence[int] | None = None, convention: Literal["opengl", "ros", "world"] = "ros", ): @@ -281,8 +314,8 @@ def set_world_poses( Args: positions: The cartesian coordinates [m]. Shape is (N, 3). A :class:`warp.array` - is the canonical input type; :class:`ProxyArray`, :class:`torch.Tensor`, - and :class:`numpy.ndarray` are also accepted. Defaults to None, in which + is the canonical input type; :class:`ProxyArray`, :class:`numpy.ndarray`, + and :class:`torch.Tensor` are also accepted. Defaults to None, in which case the camera position is not changed. orientations: The quaternion orientation in (x, y, z, w). Shape is (N, 4). Same accepted types as ``positions``. Defaults to None, in which case @@ -293,61 +326,63 @@ def set_world_poses( Raises: RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first. """ - # resolve env_ids - if env_ids is None: - env_ids = self._ALL_INDICES - # normalize positions to a torch tensor on the sensor device + # Position buffer for FrameView: (N,) of wp.vec3. + pos_wp: wp.array | None = None if positions is not None: - positions = self._to_device_tensor(positions) - # convert rotation matrix from input convention to OpenGL + pos_wp = _to_wp_array(positions, device=self._device, dtype=wp.vec3) + + # Orientation buffer for FrameView: (N,) of wp.quatf in the OpenGL convention. + ori_wp: wp.array | None = None if orientations is not None: - orientations = self._to_device_tensor(orientations) - orientations = convert_camera_frame_orientation_convention(orientations, origin=convention, target="opengl") - # convert torch tensors to warp arrays for the view - pos_wp = wp.from_torch(positions.contiguous()) if positions is not None else None - ori_wp = wp.from_torch(orientations.contiguous()) if orientations is not None else None - if env_ids is not None: - if not isinstance(env_ids, torch.Tensor): - env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device) - idx_wp = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32) - else: - idx_wp = None + src_quat = _to_wp_array(orientations, device=self._device, dtype=wp.quatf) + ori_wp = ( + convert_quat_array(src_quat, origin=convention, target="opengl") if convention != "opengl" else src_quat + ) + + idx_wp = self._resolve_env_indices(env_ids) self._view.set_world_poses(pos_wp, ori_wp, idx_wp) def set_world_poses_from_view( self, - eyes: wp.array | ProxyArray | torch.Tensor | np.ndarray, - targets: wp.array | ProxyArray | torch.Tensor | np.ndarray, + eyes: wp.array | ProxyArray | Any, + targets: wp.array | ProxyArray | Any, env_ids: Sequence[int] | None = None, ): """Set the poses of the camera from the eye position and look-at target position. Args: - eyes: The positions of the camera's eye. Shape is (N, 3). A :class:`warp.array` - is the canonical input type; :class:`ProxyArray`, :class:`torch.Tensor`, - and :class:`numpy.ndarray` are also accepted. - targets: The target locations to look at. Shape is (N, 3). Same accepted types - as ``eyes``. + eyes: The positions of the camera's eye [m]. Shape is (N, 3). A + :class:`warp.array` is the canonical input type; + :class:`ProxyArray`, :class:`numpy.ndarray`, and :class:`torch.Tensor` + are also accepted. + targets: The target locations to look at [m]. Shape is (N, 3). Same + accepted types as ``eyes``. env_ids: A sensor ids to manipulate. Defaults to None, which means all sensor indices. Raises: RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first. NotImplementedError: If the stage up-axis is not "Y" or "Z". """ - # resolve env_ids - if env_ids is None: - env_ids = self._ALL_INDICES - # normalize inputs to torch tensors on the sensor device - eyes = self._to_device_tensor(eyes) - targets = self._to_device_tensor(targets) - # get up axis of current stage + eyes_wp = _to_wp_array(eyes, device=self._device, dtype=wp.vec3) + targets_wp = _to_wp_array(targets, device=self._device, dtype=wp.vec3) up_axis = UsdGeom.GetStageUpAxis(self.stage) - # set camera poses using the view - orientations = quat_from_matrix(create_rotation_matrix_from_view(eyes, targets, up_axis, device=self._device)) - if not isinstance(env_ids, torch.Tensor): - env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device) - idx_wp = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32) - self._view.set_world_poses(wp.from_torch(eyes.contiguous()), wp.from_torch(orientations.contiguous()), idx_wp) + if up_axis == "Y": + up_vec = wp.vec3(0.0, 1.0, 0.0) + elif up_axis == "Z": + up_vec = wp.vec3(0.0, 0.0, 1.0) + else: + raise NotImplementedError(f"Stage up axis {up_axis!r} is not supported (expected 'Y' or 'Z').") + n = int(eyes_wp.shape[0]) + ori_wp = wp.empty(n, dtype=wp.quatf, device=self._device) + wp.launch( + kernel=camera_kernels.look_at_quat_kernel, + dim=n, + inputs=[eyes_wp, targets_wp, up_vec], + outputs=[ori_wp], + device=self._device, + ) + idx_wp = self._resolve_env_indices(env_ids) + self._view.set_world_poses(eyes_wp, ori_wp, idx_wp) """ Operations @@ -360,16 +395,19 @@ def reset(self, env_ids: Sequence[int] | None = None, env_mask: wp.array | None ) # reset the timestamps super().reset(env_ids, env_mask) - # resolve to indices for torch indexing - if env_ids is None and env_mask is not None: - env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1) - elif env_ids is None: - env_ids = self._ALL_INDICES - # reset the data + # Resolve to a wp.int32 array of indices and a wp.bool mask covering them. + env_ids_wp, env_mask_local = self._resolve_env_ids_or_mask(env_ids, env_mask) + if env_ids_wp.shape[0] == 0: + return # note: this recomputation is useful if one performs events such as randomizations on the camera poses. - self._update_poses(env_ids) - # Reset the frame count (ProxyArray; mutate via the zero-copy torch view) - self._frame.torch[env_ids] = 0 + self._update_poses_wp(env_ids_wp) + # Reset the frame count via a warp scatter kernel. + wp.launch( + camera_kernels.masked_set_int64_kernel, + dim=self._view.count, + inputs=[env_mask_local, wp.int64(0), self._frame.warp], + device=self._device, + ) """ Implementation. @@ -410,12 +448,13 @@ def _initialize_impl(self): f" the number of environments ({self._num_envs})." ) - # Create all env_ids buffer - self._ALL_INDICES = torch.arange(self._view.count, device=self._device, dtype=torch.long) - # Create frame count buffer (wp.array primary, ProxyArray-published). - self._frame = ProxyArray( - wp.from_torch(torch.zeros(self._view.count, device=self._device, dtype=torch.long).contiguous()) - ) + # ALL_INDICES: wp.int32 array (idiomatic for indexing into other wp.arrays). + self._ALL_INDICES = wp.array(np.arange(self._view.count, dtype=np.int32), device=self._device) + # Frame count buffer (wp.array primary, ProxyArray-published). + self._frame = ProxyArray(wp.zeros(self._view.count, dtype=wp.int64, device=self._device)) + # Pre-allocate scratch buffer for env_mask -> indices conversion. + self._env_indices_scratch = wp.empty(self._view.count, dtype=wp.int32, device=self._device) + self._env_indices_count = wp.zeros(1, dtype=wp.int32, device=self._device) # Convert all encapsulated prims to Camera for cam_prim in self._view.prims: @@ -448,14 +487,20 @@ def _initialize_impl(self): self._create_buffers() def _update_buffers_impl(self, env_mask: wp.array): - env_ids = wp.to_torch(env_mask).nonzero(as_tuple=False).squeeze(-1) - if len(env_ids) == 0: + # Mask -> compact env_ids array via warp atomic scan (no torch involvement). + env_ids_wp, count = self._mask_to_env_ids(env_mask) + if count == 0: return - # Increment frame count (ProxyArray; mutate via the zero-copy torch view) - self._frame.torch[env_ids] += 1 + # Increment frame count via masked warp kernel. + wp.launch( + camera_kernels.masked_increment_int64_kernel, + dim=self._view.count, + inputs=[env_mask, self._frame.warp], + device=self._device, + ) # update latest camera pose if requested if self.cfg.update_latest_camera_pose: - self._update_poses(env_ids) + self._update_poses_wp(env_ids_wp) sim_ctx = sim_utils.SimulationContext.instance() renderer = self._renderer @@ -475,27 +520,10 @@ def _update_buffers_impl(self, env_mask: wp.array): Private Helpers """ - def _to_device_tensor(self, value: wp.array | ProxyArray | torch.Tensor | np.ndarray) -> torch.Tensor: - """Normalize an array-like input to a ``torch.Tensor`` on this sensor's device. - - Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`, - :class:`torch.Tensor`, and :class:`numpy.ndarray`. Warp inputs are unwrapped via - ``wp.to_torch`` (zero-copy). - """ - if isinstance(value, ProxyArray): - value = value.torch - elif isinstance(value, wp.array): - value = wp.to_torch(value) - if isinstance(value, np.ndarray): - return torch.from_numpy(value).to(device=self._device) - if not isinstance(value, torch.Tensor): - return torch.tensor(value, device=self._device) - return value.to(device=self._device) - def _check_supported_data_types(self, cfg: CameraCfg): """Checks if the data types are supported by the ray-caster camera.""" # check if there is any intersection in unsupported types - # reason: these use np structured data types which we can't yet convert to torch tensor + # reason: these use np structured data types which we can't yet convert to a warp buffer common_elements = set(cfg.data_types) & Camera.UNSUPPORTED_TYPES if common_elements: # provide alternative fast counterparts @@ -507,13 +535,18 @@ def _check_supported_data_types(self, cfg: CameraCfg): raise ValueError( f"Camera class does not support the following sensor types: {common_elements}." "\n\tThis is because these sensor types output numpy structured data types which" - "can't be converted to torch tensors easily." + " can't be converted to a warp buffer easily." "\n\tHint: If you need to work with these sensor types, we recommend using their fast counterparts." f"\n\t\tFast counterparts: {fast_common_elements}" ) def _create_buffers(self): - """Create buffers for storing data.""" + """Allocate the camera-frame state buffers (pure warp). + + Each :class:`CameraData` field stores a ``wp.array`` primary buffer and is + published as a :class:`ProxyArray`. Renderers receive the underlying + :class:`warp.array` references via :meth:`set_outputs`. + """ specs = self._renderer.supported_output_types() # Split requested names into known/unsupported; warn once for any the renderer can't produce. known: list[str] = [] @@ -532,91 +565,104 @@ def _create_buffers(self): type(self._renderer).__name__, unsupported, ) + device_str = str(self._device) self._data = CameraData.allocate( data_types=known, height=self.cfg.height, width=self.cfg.width, num_views=self._view.count, - device=self._device, + device=device_str, supported_specs=specs, ) # Camera-frame state (pose / intrinsics) is owned by the camera, not - # the renderer: populate it on the freshly constructed ``CameraData``. - # Each field stores a wp.array primary buffer (allocated via wp.from_torch - # over a torch.zeros allocation) and is published as a ProxyArray. - self._data.intrinsic_matrices = ProxyArray( - wp.from_torch(torch.zeros((self._view.count, 3, 3), device=self._device).contiguous()) - ) - self._update_intrinsic_matrices(self._ALL_INDICES) - self._data.pos_w = ProxyArray( - wp.from_torch(torch.zeros((self._view.count, 3), device=self._device).contiguous()) - ) - self._data.quat_w_world = ProxyArray( - wp.from_torch(torch.zeros((self._view.count, 4), device=self._device).contiguous()) - ) - self._update_poses(self._ALL_INDICES) + # the renderer: populate it on the freshly constructed CameraData. + n = self._view.count + self._data.intrinsic_matrices = ProxyArray(wp.zeros((n, 3, 3), dtype=wp.float32, device=device_str)) + self._update_intrinsic_matrices(list(range(n))) + self._data.pos_w = ProxyArray(wp.zeros(n, dtype=wp.vec3, device=device_str)) + self._data.quat_w_world = ProxyArray(wp.zeros(n, dtype=wp.quatf, device=device_str)) + self._update_poses_wp(self._ALL_INDICES) # Hand the renderer the underlying wp.array buffers; the ProxyArray # wrappers in CameraData stay valid since both views share memory. self._renderer.set_outputs(self._render_data, {name: proxy.warp for name, proxy in self._data.output.items()}) def _update_intrinsic_matrices(self, env_ids: Sequence[int]): - """Compute camera's matrix of intrinsic parameters. - - Also called calibration matrix. This matrix works for linear depth images. We assume square pixels. + """Compute camera's matrix of intrinsic parameters via a warp scatter kernel. - .. note:: - The calibration matrix projects points in the 3D scene onto an imaginary screen of the camera. - The coordinates of points on the image plane are in the homogeneous representation. + The per-camera scalars are read from the USD prims (CPU-bound) and + scattered into the GPU buffer in a single launch. """ - # zero-copy torch view of the wp.array buffer; in-place mutation is - # visible through both views. - intrinsic_view = self._data.intrinsic_matrices.torch - # iterate over all cameras - for i in env_ids: - # Get corresponding sensor prim - sensor_prim = self._sensor_prims[i] - # get camera parameters + if len(env_ids) == 0: + return + height, width = self.image_shape + fx_host = np.empty(len(env_ids), dtype=np.float32) + fy_host = np.empty(len(env_ids), dtype=np.float32) + cx_host = np.empty(len(env_ids), dtype=np.float32) + cy_host = np.empty(len(env_ids), dtype=np.float32) + idx_host = np.asarray(list(env_ids), dtype=np.int32) + for k, i in enumerate(env_ids): + sensor_prim = self._sensor_prims[int(i)] # currently rendering does not use aperture offsets or vertical aperture focal_length = sensor_prim.GetFocalLengthAttr().Get() horiz_aperture = sensor_prim.GetHorizontalApertureAttr().Get() - - # get viewport parameters - height, width = self.image_shape - # extract intrinsic parameters f_x = (width * focal_length) / horiz_aperture - f_y = f_x - c_x = width * 0.5 - c_y = height * 0.5 - # create intrinsic matrix for depth linear - intrinsic_view[i, 0, 0] = f_x - intrinsic_view[i, 0, 2] = c_x - intrinsic_view[i, 1, 1] = f_y - intrinsic_view[i, 1, 2] = c_y - intrinsic_view[i, 2, 2] = 1 - - def _update_poses(self, env_ids: Sequence[int]): - """Computes the pose of the camera in the world frame with ROS convention. - - This methods uses the ROS convention to resolve the input pose. In this convention, - we assume that the camera front-axis is +Z-axis and up-axis is -Y-axis. - - Returns: - A tuple of the position (in meters) and quaternion (x, y, z, w). + fx_host[k] = f_x + fy_host[k] = f_x + cx_host[k] = width * 0.5 + cy_host[k] = height * 0.5 + device_str = str(self._device) + wp.launch( + camera_kernels.write_intrinsic_matrices_kernel, + dim=len(env_ids), + inputs=[ + wp.array(idx_host, dtype=wp.int32, device=device_str), + wp.array(fx_host, dtype=wp.float32, device=device_str), + wp.array(fy_host, dtype=wp.float32, device=device_str), + wp.array(cx_host, dtype=wp.float32, device=device_str), + wp.array(cy_host, dtype=wp.float32, device=device_str), + self._data.intrinsic_matrices.warp, + ], + device=device_str, + ) + + def _update_poses_wp(self, env_ids_wp: wp.array): + """Pull camera poses from the view and write them into ``self._data`` (pure warp). + + Args: + env_ids_wp: ``wp.int32`` array of camera indices to refresh, or + :attr:`_ALL_INDICES` to refresh every camera. """ - # check camera prim exists if len(self._sensor_prims) == 0: raise RuntimeError("Camera prim is None. Please call 'sim.play()' first.") - # get the poses from the view (returns ProxyArray, use .torch for tensor access) - if env_ids is not None and not isinstance(env_ids, torch.Tensor): - env_ids = torch.tensor(env_ids, dtype=torch.int32, device=self._device) - indices = wp.from_torch(env_ids.to(dtype=torch.int32), dtype=wp.int32) if env_ids is not None else None - pos_w, quat_w = self._view.get_world_poses(indices) - # Mutate via the zero-copy torch view to avoid the ProxyArray.__setitem__ - # deprecation warning (the view shares memory with the wp.array primary). - self._data.pos_w.torch[env_ids] = pos_w.torch - self._data.quat_w_world.torch[env_ids] = convert_camera_frame_orientation_convention( - quat_w.torch, origin="opengl", target="world" + pos_w_proxy, quat_w_proxy = self._view.get_world_poses(env_ids_wp) + # Convert quat_w (opengl) -> world via a warp kernel and scatter into CameraData. + n_pose = int(env_ids_wp.shape[0]) + quat_w_world_local = convert_quat_array(quat_w_proxy.warp, origin="opengl", target="world") + # Build a temporary wp.bool mask covering env_ids_wp so we can scatter through + # the existing masked_set kernels. + mask_local = wp.zeros(self._view.count, dtype=wp.bool, device=self._device) + wp.launch( + camera_kernels.indices_to_mask_kernel, + dim=n_pose, + inputs=[env_ids_wp, mask_local], + device=self._device, + ) + # Scatter pos and quat into self._data via masked copies. + # Note: the view's pos/quat arrays are already the same length as env_ids_wp, + # so we copy them into a full-sized scratch first via index, then mask-copy. + # For correctness, we instead scatter directly from the (M,) arrays using an indexed kernel. + wp.launch( + camera_kernels.scatter_vec3f_kernel, + dim=n_pose, + inputs=[env_ids_wp, pos_w_proxy.warp, self._data.pos_w.warp], + device=self._device, + ) + wp.launch( + camera_kernels.scatter_quatf_kernel, + dim=n_pose, + inputs=[env_ids_wp, quat_w_world_local, self._data.quat_w_world.warp], + device=self._device, ) # notify renderer of updated poses (guarded in case called before initialization completes) if self._render_data is not None: @@ -627,6 +673,76 @@ def _update_poses(self, env_ids: Sequence[int]): self._data.intrinsic_matrices.warp, ) + # ---- env id / mask helpers ------------------------------------------------------------------ + + def _resolve_env_indices(self, env_ids: Sequence[int] | wp.array | ProxyArray | None) -> wp.array | None: + """Resolve user-supplied env_ids to a contiguous ``wp.int32`` array (or ``None``). + + ``None`` is passed through so callers can short-circuit to the "all" branch. + """ + if env_ids is None: + return None + if isinstance(env_ids, ProxyArray): + env_ids = env_ids.warp + if isinstance(env_ids, wp.array): + if env_ids.dtype is wp.int32: + return env_ids + return wp.array(env_ids.numpy().astype(np.int32, copy=False), dtype=wp.int32, device=self._device) + # Sequence of ints / numpy-array. + return wp.array(np.asarray(list(env_ids), dtype=np.int32), dtype=wp.int32, device=self._device) + + def _resolve_env_ids_or_mask( + self, + env_ids: Sequence[int] | wp.array | ProxyArray | None, + env_mask: wp.array | None, + ) -> tuple[wp.array, wp.array]: + """Return both an int32 indices array and a bool mask covering the same envs.""" + device_str = str(self._device) + if env_mask is not None and env_ids is None: + ids, _ = self._mask_to_env_ids(env_mask) + return ids, env_mask + if env_ids is None: + mask = wp.array(np.ones(self._view.count, dtype=np.bool_), dtype=wp.bool, device=device_str) + return self._ALL_INDICES, mask + ids = self._resolve_env_indices(env_ids) + mask = wp.zeros(self._view.count, dtype=wp.bool, device=device_str) + wp.launch( + camera_kernels.indices_to_mask_kernel, + dim=int(ids.shape[0]), + inputs=[ids, mask], + device=device_str, + ) + return ids, mask + + def _mask_to_env_ids(self, env_mask: wp.array) -> tuple[wp.array, int]: + """Compact a ``wp.bool`` mask into a contiguous ``wp.int32`` indices array. + + Returns ``(indices, count)`` where ``indices`` is a freshly-allocated + slice over the populated prefix of the per-step scratch buffer. + """ + device_str = str(self._device) + # Reset counter; reuse the persistent scratch buffer. + self._env_indices_count.zero_() + wp.launch( + camera_kernels.mask_to_indices_kernel, + dim=int(env_mask.shape[0]), + inputs=[env_mask, self._env_indices_scratch, self._env_indices_count], + device=device_str, + ) + count = int(self._env_indices_count.numpy()[0]) + if count == 0: + return wp.empty(0, dtype=wp.int32, device=device_str), 0 + # Sub-view over the populated prefix; non-owning so the caller mustn't + # mutate the scratch buffer between calls. + sub = wp.array( + ptr=self._env_indices_scratch.ptr, + dtype=wp.int32, + shape=(count,), + device=self._env_indices_scratch.device, + copy=False, + ) + return sub, count + """ Internal simulation callbacks. """ diff --git a/source/isaaclab/isaaclab/sensors/camera/camera_data.py b/source/isaaclab/isaaclab/sensors/camera/camera_data.py index 40595e40d1c5..01eb7bacb44e 100644 --- a/source/isaaclab/isaaclab/sensors/camera/camera_data.py +++ b/source/isaaclab/isaaclab/sensors/camera/camera_data.py @@ -8,20 +8,15 @@ from dataclasses import dataclass from typing import Any -import torch import warp as wp # Re-exported as part of the public isaaclab.sensors.camera API from isaaclab.renderers.output_contract import RenderBufferKind, RenderBufferSpec -from isaaclab.utils.math import convert_camera_frame_orientation_convention from isaaclab.utils.warp.proxy_array import ProxyArray -__all__ = ["CameraData", "RenderBufferKind", "RenderBufferSpec"] - +from .orientation_conventions import convert_quat_array -def _wrap(buf: torch.Tensor) -> ProxyArray: - """Wrap a torch buffer as a :class:`ProxyArray` with a zero-copy ``wp.array`` view.""" - return ProxyArray(wp.from_torch(buf)) +__all__ = ["CameraData", "RenderBufferKind", "RenderBufferSpec"] @dataclass @@ -89,16 +84,21 @@ def allocate( height: int, width: int, num_views: int, - device: torch.device | str, + device: str, supported_specs: dict[RenderBufferKind, RenderBufferSpec], ) -> CameraData: """Build a :class:`CameraData` with output buffers pre-allocated. - Allocates one ``(num_views, height, width, channels)`` buffer per kind - in the intersection of ``data_types`` and ``supported_specs``, using - the channels and dtype from each :class:`RenderBufferSpec`. Each buffer - is exposed as a :class:`ProxyArray` (``wp.array`` primary, zero-copy - ``torch.Tensor`` view via ``.torch``). + Allocates one ``(num_views, height, width, channels)`` :class:`warp.array` + per kind in the intersection of ``data_types`` and ``supported_specs``, + using the channels and dtype from each :class:`RenderBufferSpec`. Each + buffer is exposed as a :class:`ProxyArray` (``wp.array`` primary, + zero-copy ``torch.Tensor`` view via ``.torch``). + + When the renderer publishes both ``"rgb"`` and ``"rgba"`` and the user + requests either, only the ``"rgba"`` buffer is allocated; the ``"rgb"`` + entry is a non-owning :class:`warp.array` view over the first three + channels of the same memory. Args: data_types: Requested output names (typically :attr:`CameraCfg.data_types`). @@ -106,7 +106,7 @@ def allocate( height: Image height in pixels. width: Image width in pixels. num_views: Number of camera views (batch dimension). - device: Torch device on which to allocate the buffers. + device: Warp device string on which to allocate the buffers. supported_specs: Per-buffer layout the active renderer can produce, keyed by :class:`RenderBufferKind`. Names absent from this mapping are not allocated. @@ -128,8 +128,9 @@ def allocate( unknown.append(name) if unknown: raise ValueError(f"Unknown RenderBufferKind name(s): {unknown}. Expected members of RenderBufferKind.") - # rgb is exposed as a view into rgba when the renderer publishes both, - # so requesting either one allocates the shared rgba buffer. + # rgb is exposed as a non-owning wp.array view over the rgba buffer when + # the renderer publishes both, so requesting either one allocates the + # shared rgba buffer. rgb_alias = ( RenderBufferKind.RGBA in supported_specs and RenderBufferKind.RGB in supported_specs @@ -138,28 +139,32 @@ def allocate( if rgb_alias: requested.update({RenderBufferKind.RGB, RenderBufferKind.RGBA}) - # Allocate the underlying torch buffers first so we can preserve the - # rgb-as-view-of-rgba alias as a torch slice (shared memory). Each - # buffer is then wrapped as a ProxyArray over a wp.from_torch view — - # the wp.array and torch.Tensor share the same allocation. - torch_buffers: dict[str, torch.Tensor] = {} + device_str = str(device) + buffers: dict[str, ProxyArray] = {} for name, spec in supported_specs.items(): if name not in requested: continue if rgb_alias and name == RenderBufferKind.RGB: continue - torch_buffers[str(name)] = torch.zeros( - (num_views, height, width, spec.channels), - dtype=spec.dtype, - device=device, - ).contiguous() + arr = wp.zeros((num_views, height, width, spec.channels), dtype=spec.dtype, device=device_str) + buffers[str(name)] = ProxyArray(arr) if rgb_alias: - # Share storage with rgba: the slice is a non-contiguous torch view. - # ``wp.from_torch`` accepts strided views, so the resulting ProxyArray - # tracks the same underlying memory as the rgba buffer. - torch_buffers[str(RenderBufferKind.RGB)] = torch_buffers[str(RenderBufferKind.RGBA)][..., :3] - - buffers: dict[str, ProxyArray] = {name: _wrap(buf) for name, buf in torch_buffers.items()} + rgba_arr = buffers[str(RenderBufferKind.RGBA)].warp + rgba_spec = supported_specs[RenderBufferKind.RGBA] + rgb_channels = supported_specs[RenderBufferKind.RGB].channels + # Construct a non-owning wp.array view that shares storage with rgba + # but reports the rgb channel count on the last axis. Strides are + # inherited from the contiguous rgba allocation, leaving the rgb + # view non-contiguous (last axis stride > element size). + rgb_view = wp.array( + ptr=rgba_arr.ptr, + dtype=rgba_spec.dtype, + shape=(num_views, height, width, rgb_channels), + strides=rgba_arr.strides, + device=rgba_arr.device, + copy=False, + ) + buffers[str(RenderBufferKind.RGB)] = ProxyArray(rgb_view) return cls( image_shape=(height, width), @@ -180,8 +185,7 @@ def quat_w_ros(self) -> ProxyArray: Shape is (N, 4) where N is the number of sensors. """ - converted = convert_camera_frame_orientation_convention(self.quat_w_world.torch, origin="world", target="ros") - return _wrap(converted.contiguous()) + return ProxyArray(convert_quat_array(self.quat_w_world.warp, origin="world", target="ros")) @property def quat_w_opengl(self) -> ProxyArray: @@ -193,7 +197,4 @@ def quat_w_opengl(self) -> ProxyArray: Shape is (N, 4) where N is the number of sensors. """ - converted = convert_camera_frame_orientation_convention( - self.quat_w_world.torch, origin="world", target="opengl" - ) - return _wrap(converted.contiguous()) + return ProxyArray(convert_quat_array(self.quat_w_world.warp, origin="world", target="opengl")) diff --git a/source/isaaclab/isaaclab/sensors/camera/kernels.py b/source/isaaclab/isaaclab/sensors/camera/kernels.py new file mode 100644 index 000000000000..4a94fff0ea66 --- /dev/null +++ b/source/isaaclab/isaaclab/sensors/camera/kernels.py @@ -0,0 +1,318 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp kernels backing the standard :class:`~isaaclab.sensors.camera.Camera`. + +Each kernel keeps the camera's runtime fully Warp-native: there is no torch +involvement in the per-step pose / intrinsic / frame-counter updates. +""" + +from __future__ import annotations + +import warp as wp + + +@wp.kernel +def masked_increment_int64_kernel( + mask: wp.array(dtype=wp.bool), + counter: wp.array(dtype=wp.int64), +): + """Increment ``counter[i]`` by one for every ``i`` where ``mask[i]`` is true.""" + tid = wp.tid() + if mask[tid]: + counter[tid] = counter[tid] + wp.int64(1) + + +@wp.kernel +def masked_set_int64_kernel( + mask: wp.array(dtype=wp.bool), + value: wp.int64, + target: wp.array(dtype=wp.int64), +): + """Overwrite ``target[i]`` with ``value`` for every ``i`` where ``mask[i]`` is true.""" + tid = wp.tid() + if mask[tid]: + target[tid] = value + + +@wp.kernel +def masked_set_vec3f_kernel( + mask: wp.array(dtype=wp.bool), + src: wp.array(dtype=wp.vec3f), + dst: wp.array(dtype=wp.vec3f), +): + """For every ``i`` where ``mask[i]`` is true, assign ``dst[i] = src[i]``.""" + tid = wp.tid() + if mask[tid]: + dst[tid] = src[tid] + + +@wp.kernel +def masked_set_quatf_kernel( + mask: wp.array(dtype=wp.bool), + src: wp.array(dtype=wp.quatf), + dst: wp.array(dtype=wp.quatf), +): + """For every ``i`` where ``mask[i]`` is true, assign ``dst[i] = src[i]``.""" + tid = wp.tid() + if mask[tid]: + dst[tid] = src[tid] + + +@wp.kernel +def write_intrinsic_matrices_kernel( + env_ids: wp.array(dtype=wp.int32), + fx: wp.array(dtype=wp.float32), + fy: wp.array(dtype=wp.float32), + cx: wp.array(dtype=wp.float32), + cy: wp.array(dtype=wp.float32), + target: wp.array(dtype=wp.float32, ndim=3), +): + """Scatter intrinsic-matrix scalars (``f_x``, ``f_y``, ``c_x``, ``c_y``, 1.0) + into ``target[env_ids[t], :, :]`` for ``t`` in ``[0, env_ids.shape[0])``. + """ + tid = wp.tid() + i = env_ids[tid] + target[i, 0, 0] = fx[tid] + target[i, 0, 2] = cx[tid] + target[i, 1, 1] = fy[tid] + target[i, 1, 2] = cy[tid] + target[i, 2, 2] = wp.float32(1.0) + + +@wp.kernel +def vec3f_view_to_torch_layout_kernel( + src: wp.array(dtype=wp.vec3f), + dst: wp.array(dtype=wp.float32, ndim=2), +): + """Copy from a ``(N,)`` array of ``wp.vec3f`` into a ``(N, 3)`` float32 array.""" + tid = wp.tid() + v = src[tid] + dst[tid, 0] = v[0] + dst[tid, 1] = v[1] + dst[tid, 2] = v[2] + + +@wp.kernel +def quatf_view_to_torch_layout_kernel( + src: wp.array(dtype=wp.quatf), + dst: wp.array(dtype=wp.float32, ndim=2), +): + """Copy from a ``(N,)`` array of ``wp.quatf`` into a ``(N, 4)`` float32 array.""" + tid = wp.tid() + q = src[tid] + dst[tid, 0] = q[0] + dst[tid, 1] = q[1] + dst[tid, 2] = q[2] + dst[tid, 3] = q[3] + + +@wp.kernel +def torch_layout_to_vec3f_kernel( + src: wp.array(dtype=wp.float32, ndim=2), + dst: wp.array(dtype=wp.vec3f), +): + """Copy from a ``(N, 3)`` float32 array into a ``(N,)`` ``wp.vec3f`` array.""" + tid = wp.tid() + dst[tid] = wp.vec3f(src[tid, 0], src[tid, 1], src[tid, 2]) + + +@wp.kernel +def torch_layout_to_quatf_kernel( + src: wp.array(dtype=wp.float32, ndim=2), + dst: wp.array(dtype=wp.quatf), +): + """Copy from a ``(N, 4)`` float32 array into a ``(N,)`` ``wp.quatf`` array.""" + tid = wp.tid() + dst[tid] = wp.quatf(src[tid, 0], src[tid, 1], src[tid, 2], src[tid, 3]) + + +@wp.kernel +def mask_to_indices_kernel( + mask: wp.array(dtype=wp.bool), + indices: wp.array(dtype=wp.int32), + counter: wp.array(dtype=wp.int32), +): + """Compact the indices where ``mask[i]`` is true into ``indices[0..counter[0])``. + + ``counter`` must be zero-initialized before launch. ``indices`` must have + capacity at least ``mask.shape[0]``. + """ + tid = wp.tid() + if mask[tid]: + idx = wp.atomic_add(counter, 0, 1) + indices[idx] = wp.int32(tid) + + +@wp.kernel +def indices_to_mask_kernel( + indices: wp.array(dtype=wp.int32), + mask: wp.array(dtype=wp.bool), +): + """For each ``i`` in ``indices``, set ``mask[indices[i]] = True``. + + ``mask`` must be zero-initialized before launch. + """ + tid = wp.tid() + mask[indices[tid]] = True + + +@wp.kernel +def scatter_vec3f_kernel( + indices: wp.array(dtype=wp.int32), + src: wp.array(dtype=wp.vec3), + dst: wp.array(dtype=wp.vec3), +): + """``dst[indices[t]] = src[t]`` for ``t`` in ``[0, indices.shape[0])``.""" + tid = wp.tid() + dst[indices[tid]] = src[tid] + + +@wp.kernel +def scatter_quatf_kernel( + indices: wp.array(dtype=wp.int32), + src: wp.array(dtype=wp.quatf), + dst: wp.array(dtype=wp.quatf), +): + """``dst[indices[t]] = src[t]`` for ``t`` in ``[0, indices.shape[0])``.""" + tid = wp.tid() + dst[indices[tid]] = src[tid] + + +@wp.kernel +def combine_frame_transforms_kernel( + p1: wp.array(dtype=wp.vec3), + q1: wp.array(dtype=wp.quatf), + p2: wp.array(dtype=wp.vec3), + q2: wp.array(dtype=wp.quatf), + out_p: wp.array(dtype=wp.vec3), + out_q: wp.array(dtype=wp.quatf), +): + """Compose two rigid transforms: ``out = T1 ∘ T2`` (parent ∘ child). + + Mirrors :func:`isaaclab.utils.math.combine_frame_transforms`. + """ + tid = wp.tid() + out_p[tid] = p1[tid] + wp.quat_rotate(q1[tid], p2[tid]) + out_q[tid] = q1[tid] * q2[tid] + + +@wp.kernel +def gather_vec3f_kernel( + indices: wp.array(dtype=wp.int32), + src: wp.array(dtype=wp.vec3), + dst: wp.array(dtype=wp.vec3), +): + """``dst[t] = src[indices[t]]`` for ``t`` in ``[0, indices.shape[0])``.""" + tid = wp.tid() + dst[tid] = src[indices[tid]] + + +@wp.kernel +def gather_quatf_kernel( + indices: wp.array(dtype=wp.int32), + src: wp.array(dtype=wp.quatf), + dst: wp.array(dtype=wp.quatf), +): + """``dst[t] = src[indices[t]]`` for ``t`` in ``[0, indices.shape[0])``.""" + tid = wp.tid() + dst[tid] = src[indices[tid]] + + +@wp.kernel +def quat_inverse_apply_kernel( + q: wp.array(dtype=wp.quatf), + v: wp.array(dtype=wp.vec3), + out: wp.array(dtype=wp.vec3), +): + """``out[i] = quat_rotate(quat_inverse(q[i]), v[i])`` — rotate ``v`` by the inverse of ``q``.""" + tid = wp.tid() + out[tid] = wp.quat_rotate_inv(q[tid], v[tid]) + + +@wp.kernel +def quat_inverse_multiply_kernel( + q1: wp.array(dtype=wp.quatf), + q2: wp.array(dtype=wp.quatf), + out: wp.array(dtype=wp.quatf), +): + """``out[i] = quat_inverse(q1[i]) * q2[i]``.""" + tid = wp.tid() + out[tid] = wp.quat_inverse(q1[tid]) * q2[tid] + + +@wp.kernel +def vec3_subtract_kernel( + a: wp.array(dtype=wp.vec3), + b: wp.array(dtype=wp.vec3), + out: wp.array(dtype=wp.vec3), +): + """``out[i] = a[i] - b[i]``.""" + tid = wp.tid() + out[tid] = a[tid] - b[tid] + + +@wp.kernel +def look_at_quat_kernel( + eyes: wp.array(dtype=wp.vec3), + targets: wp.array(dtype=wp.vec3), + up_world: wp.vec3, + out: wp.array(dtype=wp.quatf), +): + """For each camera ``i``, compute the OpenGL-convention look-at quaternion that + points the camera from ``eyes[i]`` toward ``targets[i]`` with the supplied + world up-axis. The result is stored in ``out[i]`` as ``wp.quatf`` (x, y, z, w). + + Mirrors :func:`isaaclab.utils.math.create_rotation_matrix_from_view` followed by + :func:`isaaclab.utils.math.quat_from_matrix` so external behavior is preserved + while the implementation remains pure warp. + """ + tid = wp.tid() + # OpenGL camera convention: forward = -Z, up = +Y, right = +X. + forward = wp.normalize(targets[tid] - eyes[tid]) + right = wp.normalize(wp.cross(forward, up_world)) + cam_up = wp.cross(right, forward) + # Rotation matrix columns: (right, up, -forward) + m00 = right[0] + m01 = cam_up[0] + m02 = -forward[0] + m10 = right[1] + m11 = cam_up[1] + m12 = -forward[1] + m20 = right[2] + m21 = cam_up[2] + m22 = -forward[2] + # Convert to quaternion (xyzw) using the pytorch3d-style algorithm. + # 4 candidate q_abs^2 components: (4w^2, 4x^2, 4y^2, 4z^2). + qaw = wp.sqrt(wp.max(0.0, 1.0 + m00 + m11 + m22)) + qax = wp.sqrt(wp.max(0.0, 1.0 + m00 - m11 - m22)) + qay = wp.sqrt(wp.max(0.0, 1.0 - m00 + m11 - m22)) + qaz = wp.sqrt(wp.max(0.0, 1.0 - m00 - m11 + m22)) + # Pick the largest one and recover signed components from off-diagonal entries. + if qaw >= qax and qaw >= qay and qaw >= qaz: + denom = 2.0 * wp.max(qaw, 0.1) + qx = (m21 - m12) / denom + qy = (m02 - m20) / denom + qz = (m10 - m01) / denom + qw = qaw * qaw / denom + elif qax >= qay and qax >= qaz: + denom = 2.0 * wp.max(qax, 0.1) + qx = qax * qax / denom + qy = (m10 + m01) / denom + qz = (m02 + m20) / denom + qw = (m21 - m12) / denom + elif qay >= qaz: + denom = 2.0 * wp.max(qay, 0.1) + qx = (m10 + m01) / denom + qy = qay * qay / denom + qz = (m12 + m21) / denom + qw = (m02 - m20) / denom + else: + denom = 2.0 * wp.max(qaz, 0.1) + qx = (m20 + m02) / denom + qy = (m21 + m12) / denom + qz = qaz * qaz / denom + qw = (m10 - m01) / denom + out[tid] = wp.quatf(qx, qy, qz, qw) diff --git a/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py b/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py new file mode 100644 index 000000000000..b9ed24778e98 --- /dev/null +++ b/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py @@ -0,0 +1,240 @@ +# Copyright (c) 2022-2026, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md). +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause + +"""Warp-only camera orientation convention conversions. + +Mirrors :func:`isaaclab.utils.math.convert_camera_frame_orientation_convention` for +``wp.array(dtype=wp.quatf)`` inputs without involving torch. Each conversion is a +fixed quaternion post-multiplication, with the convention quaternions baked in at +import time from a small numpy computation. +""" + +from __future__ import annotations + +import math +from typing import Literal + +import numpy as np +import warp as wp + +# --- Conversion quaternion derivation --------------------------------------------------------- +# +# The torch reference (``isaaclab.utils.math.convert_camera_frame_orientation_convention``) +# expresses each conversion as a 3x3 rotation matrix multiplied on the right of the source +# rotation matrix. Equivalently, in quaternion form ``q_target = q_src * q_conv``. +# +# Conventions (each describing the camera local axes in their frame): +# * opengl: forward = -Z, up = +Y +# * ros: forward = +Z, up = -Y (180 deg rotation around X relative to opengl) +# * world: forward = +X, up = +Z +# +# All quaternions are stored in (x, y, z, w) order to match :class:`warp.quatf`. + + +def _euler_xyz_intrinsic_to_matrix(rx: float, ry: float, rz: float) -> np.ndarray: + """Intrinsic XYZ Euler angles to a 3x3 rotation matrix (R = Rx @ Ry @ Rz).""" + cx, sx = math.cos(rx), math.sin(rx) + cy, sy = math.cos(ry), math.sin(ry) + cz, sz = math.cos(rz), math.sin(rz) + rxm = np.array([[1.0, 0.0, 0.0], [0.0, cx, -sx], [0.0, sx, cx]]) + rym = np.array([[cy, 0.0, sy], [0.0, 1.0, 0.0], [-sy, 0.0, cy]]) + rzm = np.array([[cz, -sz, 0.0], [sz, cz, 0.0], [0.0, 0.0, 1.0]]) + return rxm @ rym @ rzm + + +def _matrix_to_quat_xyzw(rotm: np.ndarray) -> tuple[float, float, float, float]: + """Convert a 3x3 rotation matrix to a quaternion ``(x, y, z, w)``. + + Mirrors :func:`isaaclab.utils.math.quat_from_matrix` (the pytorch3d + algorithm) so the sign convention matches the torch reference for any + given input. + """ + m00, m01, m02 = float(rotm[0, 0]), float(rotm[0, 1]), float(rotm[0, 2]) + m10, m11, m12 = float(rotm[1, 0]), float(rotm[1, 1]), float(rotm[1, 2]) + m20, m21, m22 = float(rotm[2, 0]), float(rotm[2, 1]), float(rotm[2, 2]) + + q_abs_sq = ( + 1.0 + m00 + m11 + m22, # 4 * w^2 + 1.0 + m00 - m11 - m22, # 4 * x^2 + 1.0 - m00 + m11 - m22, # 4 * y^2 + 1.0 - m00 - m11 + m22, # 4 * z^2 + ) + q_abs = tuple(math.sqrt(max(0.0, v)) for v in q_abs_sq) + + # Candidate (x, y, z, w) tuples — one per branch. + candidates = ( + (m21 - m12, m02 - m20, m10 - m01, q_abs[0] ** 2), + (q_abs[1] ** 2, m10 + m01, m02 + m20, m21 - m12), + (m10 + m01, q_abs[2] ** 2, m12 + m21, m02 - m20), + (m20 + m02, m21 + m12, q_abs[3] ** 2, m10 - m01), + ) + + # Pick the branch with the largest q_abs (best-conditioned). Match the + # torch reference's denominator floor of 0.1 to keep the algorithm bit-equivalent. + idx = max(range(4), key=lambda i: q_abs[i]) + cand = candidates[idx] + denom = 2.0 * max(q_abs[idx], 0.1) + return cand[0] / denom, cand[1] / denom, cand[2] / denom, cand[3] / denom + + +# 180 deg rotation around X axis: q = (1, 0, 0, 0) in (x, y, z, w). 180-deg rotations are +# self-inverse, so the same quaternion serves both directions for ros<->opengl. +_Q_FLIP_YZ = (1.0, 0.0, 0.0, 0.0) +# The torch reference (``isaaclab.utils.math.convert_camera_frame_orientation_convention``) +# post-multiplies by matrix_from_euler([pi/2, -pi/2, 0], "XYZ") for world->opengl and by +# the *transpose* of the same matrix for opengl->world. As a quaternion, the transpose of +# a unit rotation matrix is the inverse — i.e. the conjugate of the corresponding quat. +_Q_WORLD_TO_OPENGL = _matrix_to_quat_xyzw(_euler_xyz_intrinsic_to_matrix(math.pi / 2, -math.pi / 2, 0.0)) +_Q_OPENGL_TO_WORLD = ( + -_Q_WORLD_TO_OPENGL[0], + -_Q_WORLD_TO_OPENGL[1], + -_Q_WORLD_TO_OPENGL[2], + _Q_WORLD_TO_OPENGL[3], +) + + +def _identity() -> tuple[float, float, float, float]: + return 0.0, 0.0, 0.0, 1.0 + + +def _quat_mul_xyzw( + a: tuple[float, float, float, float], b: tuple[float, float, float, float] +) -> tuple[float, float, float, float]: + """Hamilton product on (x, y, z, w) quaternions, returning a * b.""" + ax, ay, az, aw = a + bx, by, bz, bw = b + return ( + aw * bx + ax * bw + ay * bz - az * by, + aw * by - ax * bz + ay * bw + az * bx, + aw * bz + ax * by - ay * bx + az * bw, + aw * bw - ax * bx - ay * by - az * bz, + ) + + +def _resolve_conversion_quat( + origin: Literal["opengl", "ros", "world"], + target: Literal["opengl", "ros", "world"], +) -> tuple[float, float, float, float]: + """Return the right-multiplication quaternion that takes ``origin`` to ``target``.""" + if origin == target: + return _identity() + + # First take origin -> opengl on the right. + if origin == "ros": + to_opengl = _Q_FLIP_YZ + elif origin == "world": + to_opengl = _Q_WORLD_TO_OPENGL + else: # origin == "opengl" + to_opengl = _identity() + + # Then take opengl -> target on the right. + if target == "ros": + from_opengl = _Q_FLIP_YZ + elif target == "world": + from_opengl = _Q_OPENGL_TO_WORLD + else: # target == "opengl" + from_opengl = _identity() + + # Combined right-multiplier: q_target = q_src * to_opengl * from_opengl + return _quat_mul_xyzw(to_opengl, from_opengl) + + +@wp.kernel +def _quat_post_multiply_kernel( + src: wp.array(dtype=wp.quatf), + qx: float, + qy: float, + qz: float, + qw: float, + dst: wp.array(dtype=wp.quatf), +): + """For each i, dst[i] = src[i] * (qx, qy, qz, qw).""" + tid = wp.tid() + s = src[tid] + sx = s[0] + sy = s[1] + sz = s[2] + sw = s[3] + # Hamilton product (x, y, z, w) order. + rx = sw * qx + sx * qw + sy * qz - sz * qy + ry = sw * qy - sx * qz + sy * qw + sz * qx + rz = sw * qz + sx * qy - sy * qx + sz * qw + rw = sw * qw - sx * qx - sy * qy - sz * qz + dst[tid] = wp.quatf(rx, ry, rz, rw) + + +@wp.kernel +def _quat_xyzw_post_multiply_kernel( + src: wp.array(dtype=wp.float32, ndim=2), + qx: float, + qy: float, + qz: float, + qw: float, + dst: wp.array(dtype=wp.float32, ndim=2), +): + """Variant for ``(N, 4)`` float arrays storing (x, y, z, w) per row.""" + tid = wp.tid() + sx = src[tid, 0] + sy = src[tid, 1] + sz = src[tid, 2] + sw = src[tid, 3] + dst[tid, 0] = sw * qx + sx * qw + sy * qz - sz * qy + dst[tid, 1] = sw * qy - sx * qz + sy * qw + sz * qx + dst[tid, 2] = sw * qz + sx * qy - sy * qx + sz * qw + dst[tid, 3] = sw * qw - sx * qx - sy * qy - sz * qz + + +def convert_quat_array( + src: wp.array, + origin: Literal["opengl", "ros", "world"] = "opengl", + target: Literal["opengl", "ros", "world"] = "ros", +) -> wp.array: + """Convert a quaternion array from ``origin`` to ``target`` camera convention. + + Args: + src: Source quaternions. Either ``wp.array(dtype=wp.quatf, shape=(N,))`` or + ``wp.array(dtype=wp.float32, shape=(N, 4))`` with ``(x, y, z, w)`` rows. + origin: Source convention. + target: Target convention. + + Returns: + A freshly-allocated :class:`warp.array` of the same dtype/shape with the + converted quaternions. + """ + qx, qy, qz, qw = _resolve_conversion_quat(origin, target) + if origin == target: + # Defensive copy so callers can mutate the result without touching ``src``. + out = wp.empty_like(src) + wp.copy(out, src) + return out + + if src.dtype is wp.quatf: + n = int(src.shape[0]) + dst = wp.empty(n, dtype=wp.quatf, device=src.device) + wp.launch( + _quat_post_multiply_kernel, + dim=n, + inputs=[src, float(qx), float(qy), float(qz), float(qw)], + outputs=[dst], + device=src.device, + ) + return dst + + if src.dtype is wp.float32 and len(src.shape) == 2 and src.shape[1] == 4: + n = int(src.shape[0]) + dst = wp.empty((n, 4), dtype=wp.float32, device=src.device) + wp.launch( + _quat_xyzw_post_multiply_kernel, + dim=n, + inputs=[src, float(qx), float(qy), float(qz), float(qw)], + outputs=[dst], + device=src.device, + ) + return dst + + raise TypeError( + f"convert_quat_array: unsupported source array (dtype={src.dtype}, shape={src.shape})." + " Expected wp.quatf shape (N,) or wp.float32 shape (N, 4)." + ) diff --git a/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py b/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py index 56a8078c78b1..e0849b27abe2 100644 --- a/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py +++ b/source/isaaclab/isaaclab/sensors/ray_caster/ray_caster_camera.py @@ -37,11 +37,12 @@ logger = logging.getLogger(__name__) -def _to_device_tensor(value: wp.array | ProxyArray | torch.Tensor, device: str | torch.device) -> torch.Tensor: - """Normalize an array-like input to a ``torch.Tensor`` on ``device``. +def _as_torch_tensor(value: wp.array | ProxyArray | torch.Tensor, device: str | torch.device) -> torch.Tensor: + """Bring a wp/ProxyArray/torch input to a contiguous ``torch.Tensor`` on ``device``. - Accepts :class:`warp.array`, :class:`~isaaclab.utils.warp.proxy_array.ProxyArray`, - and :class:`torch.Tensor`. Warp inputs are unwrapped via ``wp.to_torch`` (zero-copy). + This helper exists only at the boundary where this class still calls into the + torch-based ``isaaclab.utils.math`` quaternion / pose utilities. The returned + tensor is a zero-copy view of the underlying warp memory whenever possible. """ if isinstance(value, ProxyArray): value = value.torch @@ -169,7 +170,7 @@ def set_intrinsic_matrices( if env_ids is None: env_ids = slice(None) # normalize matrices input to torch on sensor device - matrices = _to_device_tensor(matrices, self._device) + matrices = _as_torch_tensor(matrices, self._device) # save new intrinsic matrices and focal length (write via the zero-copy torch view) intrinsic_view = self._data.intrinsic_matrices.torch intrinsic_view[env_ids] = matrices @@ -251,12 +252,12 @@ def set_world_poses( pos_w_torch = pos_w.torch quat_w_torch = quat_w.torch if positions is not None: - positions = _to_device_tensor(positions, self._device) + positions = _as_torch_tensor(positions, self._device) # transform to camera frame pos_offset_world_frame = positions - pos_w_torch self._offset_pos[env_ids] = math_utils.quat_apply(math_utils.quat_inv(quat_w_torch), pos_offset_world_frame) if orientations is not None: - orientations = _to_device_tensor(orientations, self._device) + orientations = _as_torch_tensor(orientations, self._device) # convert rotation matrix from input convention to world quat_w_set = math_utils.convert_camera_frame_orientation_convention( orientations, origin=convention, target="world" @@ -291,8 +292,8 @@ def set_world_poses_from_view( RuntimeError: If the camera prim is not set. Need to call :meth:`initialize` method first. NotImplementedError: If the stage up-axis is not "Y" or "Z". """ - eyes = _to_device_tensor(eyes, self._device) - targets = _to_device_tensor(targets, self._device) + eyes = _as_torch_tensor(eyes, self._device) + targets = _as_torch_tensor(targets, self._device) # get up axis of current stage up_axis = UsdGeom.GetStageUpAxis(self.stage) # camera position and rotation in opengl convention diff --git a/source/isaaclab/isaaclab/utils/warp/kernels.py b/source/isaaclab/isaaclab/utils/warp/kernels.py index efcdbfe63f1e..c656e0f0e996 100644 --- a/source/isaaclab/isaaclab/utils/warp/kernels.py +++ b/source/isaaclab/isaaclab/utils/warp/kernels.py @@ -326,6 +326,32 @@ def raycast_dynamic_meshes_kernel( ray_mesh_id[tid_env, tid_ray] = wp.int16(tid_mesh_id) +@wp.kernel +def clamp_above_to_inf_kernel( + threshold: wp.float32, + target: wp.array(dtype=wp.float32, ndim=4), +): + """For every element ``v`` of ``target``, set ``v = +inf`` when ``v > threshold``. + + Used by camera renderers to enforce that out-of-range depth-style outputs + propagate as ``inf`` regardless of the upstream clipping convention. + """ + n, h, w, c = wp.tid() + if target[n, h, w, c] > threshold: + target[n, h, w, c] = wp.float32(wp.inf) + + +@wp.kernel +def replace_inf_kernel( + replacement: wp.float32, + target: wp.array(dtype=wp.float32, ndim=4), +): + """For every element ``v`` of ``target``, set ``v = replacement`` when ``isinf(v)``.""" + n, h, w, c = wp.tid() + if wp.isinf(target[n, h, w, c]): + target[n, h, w, c] = replacement + + @wp.kernel(enable_backward=False) def reshape_tiled_image( tiled_image_buffer: Any, diff --git a/source/isaaclab/test/renderers/test_camera_output_contract.py b/source/isaaclab/test/renderers/test_camera_output_contract.py index 208a814a796a..d8f26fa35058 100644 --- a/source/isaaclab/test/renderers/test_camera_output_contract.py +++ b/source/isaaclab/test/renderers/test_camera_output_contract.py @@ -9,6 +9,7 @@ import pytest import torch +import warp as wp pytest.importorskip("isaaclab_physx") @@ -148,10 +149,10 @@ def test_camera_data_allocates_supported_subset_and_aliases_rgb(): """CameraData allocates the intersection of requested + supported and aliases rgb into rgba.""" cfg = _make_camera_cfg(["rgb", "rgba", "depth"]) specs = { - RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8), - RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8), - RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32), - RenderBufferKind.NORMALS: RenderBufferSpec(3, torch.float32), + RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8), + RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8), + RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32), + RenderBufferKind.NORMALS: RenderBufferSpec(3, wp.float32), } data = CameraData.allocate( data_types=cfg.data_types, height=8, width=16, num_views=2, device="cpu", supported_specs=specs @@ -173,8 +174,8 @@ def test_camera_data_drops_requested_types_not_in_supported_specs(): """Requested types absent from supported_specs are absent from data.output.""" cfg = _make_camera_cfg(["rgb", "normals"]) specs = { - RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8), - RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8), + RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8), + RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8), } data = CameraData.allocate( data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=specs @@ -198,8 +199,8 @@ def test_camera_data_no_arg_construction_yields_empty_container(): def test_camera_data_segmentation_dtype_follows_supported_spec(): """CameraData consumes the layout dtype declared by the renderer spec.""" cfg = _make_camera_cfg(["instance_segmentation_fast"]) - raw_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(1, torch.int32)} - colorized_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(4, torch.uint8)} + raw_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(1, wp.int32)} + colorized_specs = {RenderBufferKind.INSTANCE_SEGMENTATION_FAST: RenderBufferSpec(4, wp.uint8)} raw = CameraData.allocate( data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=raw_specs @@ -218,9 +219,9 @@ def test_camera_data_output_proxyarray_zero_copy(): """Mutations through .torch are visible through .warp on every output buffer.""" cfg = _make_camera_cfg(["rgb", "rgba", "depth"]) specs = { - RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8), - RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8), - RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32), + RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8), + RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8), + RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32), } data = CameraData.allocate( data_types=cfg.data_types, height=4, width=4, num_views=1, device="cpu", supported_specs=specs @@ -237,7 +238,7 @@ def test_camera_data_output_proxyarray_zero_copy(): def test_camera_data_allocate_raises_on_unknown_name(): """An unknown data_types name raises ValueError naming the offender.""" - supported_specs = {RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8)} + supported_specs = {RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8)} with pytest.raises(ValueError) as exc_info: CameraData.allocate( data_types=["not_a_real_type"], diff --git a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py index d6e314ed2d46..e16537b7676b 100644 --- a/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py +++ b/source/isaaclab_newton/isaaclab_newton/renderers/newton_warp_renderer.py @@ -8,17 +8,17 @@ from __future__ import annotations import logging +import math from dataclasses import dataclass from typing import TYPE_CHECKING, Any import newton -import torch import warp as wp from isaaclab.renderers import BaseRenderer, RenderBufferKind, RenderBufferSpec from isaaclab.renderers.camera_render_spec import CameraRenderSpec +from isaaclab.sensors.camera.orientation_conventions import convert_quat_array from isaaclab.sim import SimulationContext -from isaaclab.utils.math import convert_camera_frame_orientation_convention from .newton_warp_renderer_cfg import NewtonWarpRendererCfg @@ -83,29 +83,50 @@ def get_output(self, output_name: str) -> wp.array: return None def update(self, positions: wp.array, orientations: wp.array, intrinsics: wp.array): - # The convention helper expects a torch tensor; use the zero-copy view of the wp.array. - converted_orientations = convert_camera_frame_orientation_convention( - wp.to_torch(orientations), origin="world", target="opengl" + # Convert world-frame orientations to opengl in-place via a warp kernel + # (no torch involvement on the data path). + orientations_quatf = ( + orientations + if orientations.dtype is wp.quatf + else wp.array( + ptr=orientations.ptr, + dtype=wp.quatf, + shape=(orientations.shape[0],), + device=orientations.device, + copy=False, + ) ) + converted_orientations = convert_quat_array(orientations_quatf, origin="world", target="opengl") self.camera_transforms = wp.empty( (1, self.newton_sensor.model.world_count), dtype=wp.transformf, device=self.newton_sensor.model.device ) + positions_vec3 = ( + positions + if positions.dtype is wp.vec3f + else wp.array( + ptr=positions.ptr, + dtype=wp.vec3f, + shape=(positions.shape[0],), + device=positions.device, + copy=False, + ) + ) wp.launch( RenderData._update_transforms, self.newton_sensor.model.world_count, - [positions, converted_orientations, self.camera_transforms], + [positions_vec3, converted_orientations, self.camera_transforms], device=self.newton_sensor.model.device, ) if self.camera_rays is None: - intrinsics_torch = wp.to_torch(intrinsics) - first_focal_length = intrinsics_torch[:, 1, 1][0:1] - fov_radians_all = 2.0 * torch.atan(self.height / (2.0 * first_focal_length)) - - self.camera_rays = self.newton_sensor.utils.compute_pinhole_camera_rays( - self.width, self.height, wp.from_torch(fov_radians_all, dtype=wp.float32) - ) + # Pull the focal length scalar via numpy (one element) to compute FOV + # without going through torch. + intrinsics_np = intrinsics.numpy() if intrinsics.device == "cpu" else intrinsics.numpy() + first_focal_length = float(intrinsics_np[0, 1, 1]) + fov_radians = 2.0 * math.atan(self.height / (2.0 * first_focal_length)) + fov_arr = wp.array([fov_radians], dtype=wp.float32, device=self.newton_sensor.model.device) + self.camera_rays = self.newton_sensor.utils.compute_pinhole_camera_rays(self.width, self.height, fov_arr) def _typed_view(self, wp_data: wp.array, dtype) -> wp.array: """Reinterpret the output buffer as ``(world_count, num_cameras, height, width)`` of ``dtype``. @@ -177,16 +198,14 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]: """Publish the per-output layout this Newton Warp backend writes. See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.supported_output_types`.""" seg_spec = ( - RenderBufferSpec(4, torch.uint8) - if self.cfg.colorize_instance_segmentation - else RenderBufferSpec(1, torch.int32) + RenderBufferSpec(4, wp.uint8) if self.cfg.colorize_instance_segmentation else RenderBufferSpec(1, wp.int32) ) return { - RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8), - RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8), - RenderBufferKind.ALBEDO: RenderBufferSpec(4, torch.uint8), - RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32), - RenderBufferKind.NORMALS: RenderBufferSpec(3, torch.float32), + RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8), + RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8), + RenderBufferKind.ALBEDO: RenderBufferSpec(4, wp.uint8), + RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32), + RenderBufferKind.NORMALS: RenderBufferSpec(3, wp.float32), RenderBufferKind.INSTANCE_SEGMENTATION_FAST: seg_spec, } diff --git a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py index 624b984c690d..5aa785ece5b3 100644 --- a/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py +++ b/source/isaaclab_ov/isaaclab_ov/renderers/ovrtx_renderer.py @@ -27,7 +27,6 @@ logger = logging.getLogger(__name__) import numpy as np -import torch import warp as wp # The ovrtx C library links to its own version of the USD libraries. Having @@ -41,7 +40,7 @@ from packaging.version import Version from isaaclab.renderers import BaseRenderer, RenderBufferKind, RenderBufferSpec -from isaaclab.utils.math import convert_camera_frame_orientation_convention +from isaaclab.sensors.camera.orientation_conventions import convert_quat_array from .ovrtx_renderer_cfg import OVRTXRendererCfg from .ovrtx_renderer_kernels import ( @@ -133,16 +132,16 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]: """Publish the per-output layout this OVRTX backend writes. See :meth:`~isaaclab.renderers.base_renderer.BaseRenderer.supported_output_types`.""" return { - RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8), - RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8), - RenderBufferKind.ALBEDO: RenderBufferSpec(4, torch.uint8), - RenderBufferKind.SIMPLE_SHADING_CONSTANT_DIFFUSE: RenderBufferSpec(3, torch.uint8), - RenderBufferKind.SIMPLE_SHADING_DIFFUSE_MDL: RenderBufferSpec(3, torch.uint8), - RenderBufferKind.SIMPLE_SHADING_FULL_MDL: RenderBufferSpec(3, torch.uint8), - RenderBufferKind.SEMANTIC_SEGMENTATION: RenderBufferSpec(4, torch.uint8), - RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32), - RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, torch.float32), - RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, torch.float32), + RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8), + RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8), + RenderBufferKind.ALBEDO: RenderBufferSpec(4, wp.uint8), + RenderBufferKind.SIMPLE_SHADING_CONSTANT_DIFFUSE: RenderBufferSpec(3, wp.uint8), + RenderBufferKind.SIMPLE_SHADING_DIFFUSE_MDL: RenderBufferSpec(3, wp.uint8), + RenderBufferKind.SIMPLE_SHADING_FULL_MDL: RenderBufferSpec(3, wp.uint8), + RenderBufferKind.SEMANTIC_SEGMENTATION: RenderBufferSpec(4, wp.uint8), + RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32), + RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, wp.float32), + RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, wp.float32), } def __init__(self, cfg: OVRTXRendererCfg): @@ -367,15 +366,8 @@ def create_render_data(self, spec: CameraRenderSpec) -> OVRTXRenderData: self.initialize(spec) return OVRTXRenderData(spec, DEVICE) - # Map torch dtypes to their warp counterparts for zero-copy wrapping. - _TORCH_TO_WP_DTYPE: dict[torch.dtype, Any] = { - torch.uint8: wp.uint8, - torch.float32: wp.float32, - torch.int32: wp.int32, - } - def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, wp.array]) -> None: - """Wrap caller-owned ``wp.array`` output buffers with the strict per-kind dtype OVRTX expects. + """Wrap caller-owned ``wp.array`` output buffers under the strict per-kind dtype OVRTX expects. Aliased views over a contiguous sibling (e.g. ``rgb`` over ``rgba``) are skipped; any other non-contiguous array raises ``ValueError``. @@ -392,20 +384,8 @@ def set_outputs(self, render_data: OVRTXRenderData, output_data: dict[str, wp.ar f"OVRTXRenderer.set_outputs: output '{name}' is non-contiguous and is not an" " alias of a contiguous output buffer; cannot rewrap with the OVRTX dtype." ) - torch_dtype = wp.to_torch(arr).dtype - wp_dtype = self._TORCH_TO_WP_DTYPE.get(torch_dtype) - if wp_dtype is None: - raise ValueError( - f"OVRTXRenderer.set_outputs: unsupported dtype {torch_dtype} for output" - f" '{name}'. Add it to OVRTXRenderer._TORCH_TO_WP_DTYPE." - ) - render_data.warp_buffers[name] = wp.array( - ptr=arr.ptr, - dtype=wp_dtype, - shape=tuple(arr.shape), - device=arr.device, - copy=False, - ) + # Camera spec dtype is already a warp dtype; keep the buffer as-is. + render_data.warp_buffers[name] = arr def update_transforms(self) -> None: """Sync physics objects to OVRTX.""" @@ -443,14 +423,24 @@ def update_camera( ) -> None: """Update camera transforms in OVRTX binding.""" num_envs = positions.shape[0] - # convention helper expects torch; use the zero-copy view of the wp.array - camera_quats_opengl = convert_camera_frame_orientation_convention( - wp.to_torch(orientations), origin="world", target="opengl" + # Convert orientations from world to opengl convention via a warp kernel. + orientations_quatf = ( + orientations + if orientations.dtype is wp.quatf + else wp.array( + ptr=orientations.ptr, + dtype=wp.quatf, + shape=(num_envs,), + device=orientations.device, + copy=False, + ) ) - camera_positions_wp = wp.array( - ptr=positions.ptr, dtype=wp.vec3, shape=(positions.shape[0],), device=positions.device, copy=False + camera_orientations_wp = convert_quat_array(orientations_quatf, origin="world", target="opengl") + camera_positions_wp = ( + positions + if positions.dtype is wp.vec3 + else wp.array(ptr=positions.ptr, dtype=wp.vec3, shape=(num_envs,), device=positions.device, copy=False) ) - camera_orientations_wp = wp.from_torch(camera_quats_opengl.contiguous(), dtype=wp.quatf) camera_transforms = wp.zeros(num_envs, dtype=wp.mat44d, device=DEVICE) wp.launch( kernel=create_camera_transforms_kernel, @@ -573,8 +563,14 @@ def _process_render_frame(self, render_data: OVRTXRenderData, frame, output_buff with frame.render_vars[depth_var].map(device=Device.CUDA) as mapping: tiled_depth_data = wp.from_dlpack(mapping.tensor) if tiled_depth_data.dtype == wp.uint32: - tiled_depth_data = wp.from_torch( - wp.to_torch(tiled_depth_data).view(torch.float32), dtype=wp.float32 + # Reinterpret in place as float32 (same bit width). Pure warp. + tiled_depth_data = wp.array( + ptr=tiled_depth_data.ptr, + dtype=wp.float32, + shape=tiled_depth_data.shape, + strides=tiled_depth_data.strides, + device=tiled_depth_data.device, + copy=False, ) self._extract_depth_tiles(render_data, tiled_depth_data, output_buffers) break @@ -590,15 +586,17 @@ def _process_render_frame(self, render_data: OVRTXRenderData, frame, output_buff if tiled_semantic_data.dtype == wp.uint32: semantic_colors = self._generate_random_colors_from_ids(tiled_semantic_data) - - semantic_torch = wp.to_torch(semantic_colors) - semantic_uint8 = semantic_torch.view(torch.uint8) - - if semantic_torch.dim() == 2: - h, w = semantic_torch.shape - semantic_uint8 = semantic_uint8.reshape(h, w, 4) - - tiled_semantic_data = wp.from_torch(semantic_uint8, dtype=wp.uint8) + # Reinterpret each uint32 as 4 uint8 bytes by adding a trailing + # channel axis. Pure warp; no torch involvement. + base_shape = tuple(semantic_colors.shape) + new_shape = base_shape + (4,) if len(base_shape) == 2 else base_shape + tiled_semantic_data = wp.array( + ptr=semantic_colors.ptr, + dtype=wp.uint8, + shape=new_shape, + device=semantic_colors.device, + copy=False, + ) self._extract_rgba_tiles( render_data, diff --git a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py index 10f90e53c958..d8ca957ac72c 100644 --- a/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py +++ b/source/isaaclab_physx/isaaclab_physx/renderers/isaac_rtx_renderer.py @@ -14,7 +14,6 @@ from typing import TYPE_CHECKING, Any import numpy as np -import torch import warp as wp from packaging import version @@ -24,7 +23,7 @@ from isaaclab.renderers import BaseRenderer, RenderBufferKind, RenderBufferSpec from isaaclab.renderers.camera_render_spec import CameraRenderSpec from isaaclab.utils.version import get_isaac_sim_version -from isaaclab.utils.warp.kernels import reshape_tiled_image +from isaaclab.utils.warp.kernels import clamp_above_to_inf_kernel, replace_inf_kernel, reshape_tiled_image from .isaac_rtx_renderer_utils import ensure_isaac_rtx_render_update, ensure_rtx_hydra_engine_attached @@ -107,19 +106,19 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]: specs: dict[RenderBufferKind, RenderBufferSpec] = { # Replicator's native layout for color output is rgba/uint8; # ``Camera`` aliases ``rgb`` as a view into ``rgba`` storage. - RenderBufferKind.RGBA: RenderBufferSpec(4, torch.uint8), - RenderBufferKind.RGB: RenderBufferSpec(3, torch.uint8), - RenderBufferKind.DEPTH: RenderBufferSpec(1, torch.float32), - RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, torch.float32), - RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, torch.float32), - RenderBufferKind.NORMALS: RenderBufferSpec(3, torch.float32), - RenderBufferKind.MOTION_VECTORS: RenderBufferSpec(2, torch.float32), + RenderBufferKind.RGBA: RenderBufferSpec(4, wp.uint8), + RenderBufferKind.RGB: RenderBufferSpec(3, wp.uint8), + RenderBufferKind.DEPTH: RenderBufferSpec(1, wp.float32), + RenderBufferKind.DISTANCE_TO_IMAGE_PLANE: RenderBufferSpec(1, wp.float32), + RenderBufferKind.DISTANCE_TO_CAMERA: RenderBufferSpec(1, wp.float32), + RenderBufferKind.NORMALS: RenderBufferSpec(3, wp.float32), + RenderBufferKind.MOTION_VECTORS: RenderBufferSpec(2, wp.float32), } if sim_major >= 6: - specs[RenderBufferKind.ALBEDO] = RenderBufferSpec(4, torch.uint8) + specs[RenderBufferKind.ALBEDO] = RenderBufferSpec(4, wp.uint8) for shading_type in SIMPLE_SHADING_MODES: - specs[RenderBufferKind(shading_type)] = RenderBufferSpec(3, torch.uint8) + specs[RenderBufferKind(shading_type)] = RenderBufferSpec(3, wp.uint8) seg_specs = ( (RenderBufferKind.SEMANTIC_SEGMENTATION, self.cfg.colorize_semantic_segmentation), @@ -127,7 +126,7 @@ def supported_output_types(self) -> dict[RenderBufferKind, RenderBufferSpec]: (RenderBufferKind.INSTANCE_ID_SEGMENTATION_FAST, self.cfg.colorize_instance_id_segmentation), ) for name, colorize in seg_specs: - specs[name] = RenderBufferSpec(4, torch.uint8) if colorize else RenderBufferSpec(1, torch.int32) + specs[name] = RenderBufferSpec(4, wp.uint8) if colorize else RenderBufferSpec(1, wp.int32) return specs @@ -332,13 +331,18 @@ def tiling_grid_shape(): else: tiled_data_buffer = output - # convert data buffer to warp array + # convert data buffer to warp array. Replicator may return either a + # numpy array (CPU staging) or a torch tensor (GPU device-resident); + # both are wrapped via warp's interop without going through torch ops. if isinstance(tiled_data_buffer, np.ndarray): - # Let warp infer the dtype from numpy array instead of hardcoding uint8 + # Let warp infer the dtype from numpy array instead of hardcoding uint8. # Different annotators return different dtypes: RGB(uint8), depth(float32), segmentation(uint32) tiled_data_buffer = wp.array(tiled_data_buffer, device=device) + elif isinstance(tiled_data_buffer, wp.array): + pass else: - tiled_data_buffer = tiled_data_buffer.to(device=device) + # torch.Tensor path: wrap as a zero-copy wp.array (no torch op). + tiled_data_buffer = wp.from_torch(tiled_data_buffer.contiguous()) # process data for different segmentation types # Note: Replicator returns raw buffers of dtype uint32 for segmentation types @@ -385,18 +389,24 @@ def tiling_grid_shape(): # in values larger than the clipping range in the output. We apply an additional clipping to # ensure values are within the clipping range for all the annotators. if data_type == "distance_to_camera": - # Use the zero-copy torch view of the wp.array primary for the in-place clip. - view = wp.to_torch(output_wp) - view[view > cfg.spawn.clipping_range[1]] = torch.inf + wp.launch( + kernel=clamp_above_to_inf_kernel, + dim=output_wp.shape, + inputs=[float(cfg.spawn.clipping_range[1]), output_wp], + device=device, + ) # apply defined clipping behavior if ( data_type in ("distance_to_camera", "distance_to_image_plane", "depth") and self.cfg.depth_clipping_behavior != "none" ): - view = wp.to_torch(output_wp) - view[torch.isinf(view)] = ( - 0.0 if self.cfg.depth_clipping_behavior == "zero" else cfg.spawn.clipping_range[1] + replacement = 0.0 if self.cfg.depth_clipping_behavior == "zero" else cfg.spawn.clipping_range[1] + wp.launch( + kernel=replace_inf_kernel, + dim=output_wp.shape, + inputs=[float(replacement), output_wp], + device=device, ) def read_output(self, render_data: IsaacRtxRenderData, camera_data: CameraData) -> None: From 7d0b4d8977d2c933e7106b821c7394bfd30faa93 Mon Sep 17 00:00:00 2001 From: Horde Date: Fri, 8 May 2026 00:42:56 +0000 Subject: [PATCH 3/3] Updates --- .../isaaclab/sensors/camera/camera.py | 43 ++++++++++++------- .../sensors/camera/orientation_conventions.py | 35 +++++++++------ .../changelog.d/camera-data-proxyarray.rst | 7 +++ .../changelog.d/camera-data-proxyarray.rst | 10 +++++ 4 files changed, 66 insertions(+), 29 deletions(-) create mode 100644 source/isaaclab_contrib/changelog.d/camera-data-proxyarray.rst create mode 100644 source/isaaclab_tasks/changelog.d/camera-data-proxyarray.rst diff --git a/source/isaaclab/isaaclab/sensors/camera/camera.py b/source/isaaclab/isaaclab/sensors/camera/camera.py index b63f44a044ce..7a8ecb368810 100644 --- a/source/isaaclab/isaaclab/sensors/camera/camera.py +++ b/source/isaaclab/isaaclab/sensors/camera/camera.py @@ -576,11 +576,15 @@ def _create_buffers(self): ) # Camera-frame state (pose / intrinsics) is owned by the camera, not # the renderer: populate it on the freshly constructed CameraData. + # ``pos_w`` / ``quat_w_world`` are stored as plain float32 arrays with + # explicit ``(N, 3)`` / ``(N, 4)`` layout so ``ProxyArray.shape`` + # matches the documented shape; warp kernels reinterpret them as + # ``wp.vec3`` / ``wp.quatf`` views (zero-copy) when needed. n = self._view.count self._data.intrinsic_matrices = ProxyArray(wp.zeros((n, 3, 3), dtype=wp.float32, device=device_str)) self._update_intrinsic_matrices(list(range(n))) - self._data.pos_w = ProxyArray(wp.zeros(n, dtype=wp.vec3, device=device_str)) - self._data.quat_w_world = ProxyArray(wp.zeros(n, dtype=wp.quatf, device=device_str)) + self._data.pos_w = ProxyArray(wp.zeros((n, 3), dtype=wp.float32, device=device_str)) + self._data.quat_w_world = ProxyArray(wp.zeros((n, 4), dtype=wp.float32, device=device_str)) self._update_poses_wp(self._ALL_INDICES) # Hand the renderer the underlying wp.array buffers; the ProxyArray # wrappers in CameraData stay valid since both views share memory. @@ -639,29 +643,36 @@ def _update_poses_wp(self, env_ids_wp: wp.array): # Convert quat_w (opengl) -> world via a warp kernel and scatter into CameraData. n_pose = int(env_ids_wp.shape[0]) quat_w_world_local = convert_quat_array(quat_w_proxy.warp, origin="opengl", target="world") - # Build a temporary wp.bool mask covering env_ids_wp so we can scatter through - # the existing masked_set kernels. - mask_local = wp.zeros(self._view.count, dtype=wp.bool, device=self._device) - wp.launch( - camera_kernels.indices_to_mask_kernel, - dim=n_pose, - inputs=[env_ids_wp, mask_local], - device=self._device, + n_total = self._view.count + # Reinterpret the (N, 3) / (N, 4) float32 storage as wp.vec3 / wp.quatf + # views so the existing scatter kernels (which take typed inputs) can be + # reused without copying. + pos_dst = self._data.pos_w.warp + quat_dst = self._data.quat_w_world.warp + pos_dst_vec3 = wp.array( + ptr=pos_dst.ptr, + dtype=wp.vec3, + shape=(n_total,), + device=pos_dst.device, + copy=False, + ) + quat_dst_quatf = wp.array( + ptr=quat_dst.ptr, + dtype=wp.quatf, + shape=(n_total,), + device=quat_dst.device, + copy=False, ) - # Scatter pos and quat into self._data via masked copies. - # Note: the view's pos/quat arrays are already the same length as env_ids_wp, - # so we copy them into a full-sized scratch first via index, then mask-copy. - # For correctness, we instead scatter directly from the (M,) arrays using an indexed kernel. wp.launch( camera_kernels.scatter_vec3f_kernel, dim=n_pose, - inputs=[env_ids_wp, pos_w_proxy.warp, self._data.pos_w.warp], + inputs=[env_ids_wp, pos_w_proxy.warp, pos_dst_vec3], device=self._device, ) wp.launch( camera_kernels.scatter_quatf_kernel, dim=n_pose, - inputs=[env_ids_wp, quat_w_world_local, self._data.quat_w_world.warp], + inputs=[env_ids_wp, quat_w_world_local, quat_dst_quatf], device=self._device, ) # notify renderer of updated poses (guarded in case called before initialization completes) diff --git a/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py b/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py index b9ed24778e98..909495a70c34 100644 --- a/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py +++ b/source/isaaclab/isaaclab/sensors/camera/orientation_conventions.py @@ -13,6 +13,7 @@ from __future__ import annotations +import functools import math from typing import Literal @@ -82,17 +83,25 @@ def _matrix_to_quat_xyzw(rotm: np.ndarray) -> tuple[float, float, float, float]: # 180 deg rotation around X axis: q = (1, 0, 0, 0) in (x, y, z, w). 180-deg rotations are # self-inverse, so the same quaternion serves both directions for ros<->opengl. _Q_FLIP_YZ = (1.0, 0.0, 0.0, 0.0) -# The torch reference (``isaaclab.utils.math.convert_camera_frame_orientation_convention``) -# post-multiplies by matrix_from_euler([pi/2, -pi/2, 0], "XYZ") for world->opengl and by -# the *transpose* of the same matrix for opengl->world. As a quaternion, the transpose of -# a unit rotation matrix is the inverse — i.e. the conjugate of the corresponding quat. -_Q_WORLD_TO_OPENGL = _matrix_to_quat_xyzw(_euler_xyz_intrinsic_to_matrix(math.pi / 2, -math.pi / 2, 0.0)) -_Q_OPENGL_TO_WORLD = ( - -_Q_WORLD_TO_OPENGL[0], - -_Q_WORLD_TO_OPENGL[1], - -_Q_WORLD_TO_OPENGL[2], - _Q_WORLD_TO_OPENGL[3], -) + + +@functools.cache +def _q_world_to_opengl() -> tuple[float, float, float, float]: + """The torch reference (``isaaclab.utils.math.convert_camera_frame_orientation_convention``) + post-multiplies by ``matrix_from_euler([pi/2, -pi/2, 0], "XYZ")`` for world->opengl. + + Computed lazily so importing this module under tools that mock ``numpy`` + (e.g. Sphinx ``autodoc_mock_imports``) does not fail at module load. + """ + return _matrix_to_quat_xyzw(_euler_xyz_intrinsic_to_matrix(math.pi / 2, -math.pi / 2, 0.0)) + + +@functools.cache +def _q_opengl_to_world() -> tuple[float, float, float, float]: + """Inverse of :func:`_q_world_to_opengl` — i.e. the conjugate, since the + underlying rotation is unit-norm. Lazy for the same reason.""" + qx, qy, qz, qw = _q_world_to_opengl() + return -qx, -qy, -qz, qw def _identity() -> tuple[float, float, float, float]: @@ -125,7 +134,7 @@ def _resolve_conversion_quat( if origin == "ros": to_opengl = _Q_FLIP_YZ elif origin == "world": - to_opengl = _Q_WORLD_TO_OPENGL + to_opengl = _q_world_to_opengl() else: # origin == "opengl" to_opengl = _identity() @@ -133,7 +142,7 @@ def _resolve_conversion_quat( if target == "ros": from_opengl = _Q_FLIP_YZ elif target == "world": - from_opengl = _Q_OPENGL_TO_WORLD + from_opengl = _q_opengl_to_world() else: # target == "opengl" from_opengl = _identity() diff --git a/source/isaaclab_contrib/changelog.d/camera-data-proxyarray.rst b/source/isaaclab_contrib/changelog.d/camera-data-proxyarray.rst new file mode 100644 index 000000000000..8c30136624ef --- /dev/null +++ b/source/isaaclab_contrib/changelog.d/camera-data-proxyarray.rst @@ -0,0 +1,7 @@ +Changed +^^^^^^^ + +* :class:`~isaaclab_contrib.sensors.tacsl_sensor.VisuotactileSensor` now accesses + :attr:`~isaaclab.sensors.camera.CameraData.output` entries through their + :attr:`~isaaclab.utils.warp.proxy_array.ProxyArray.torch` view to match the + Warp-first :class:`CameraData` storage. diff --git a/source/isaaclab_tasks/changelog.d/camera-data-proxyarray.rst b/source/isaaclab_tasks/changelog.d/camera-data-proxyarray.rst new file mode 100644 index 000000000000..74b5238d20ae --- /dev/null +++ b/source/isaaclab_tasks/changelog.d/camera-data-proxyarray.rst @@ -0,0 +1,10 @@ +Changed +^^^^^^^ + +* Updated the cartpole / shadow-hand vision RL envs and the dexsuite + ``vision_camera`` observation term to read camera outputs through + :attr:`~isaaclab.utils.warp.proxy_array.ProxyArray.torch`, matching the + new Warp-first :class:`~isaaclab.sensors.camera.CameraData` storage. + Files touched: ``direct/cartpole/cartpole_camera_env.py``, + ``direct/shadow_hand/shadow_hand_vision_env.py``, + ``manager_based/manipulation/dexsuite/mdp/observations.py``.