From 8952b1ec1a71111c2b457df324e57526f1b5cc08 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 8 Oct 2025 14:36:14 -0700 Subject: [PATCH 1/5] separated detection3d and detection3dpc --- dimos/perception/detection2d/conftest.py | 16 +- dimos/perception/detection2d/module3D.py | 7 +- dimos/perception/detection2d/type/__init__.py | 9 +- .../detection2d/type/detection3d.py | 180 +------------ .../detection2d/type/detection3dpc.py | 247 ++++++++++++++++++ .../detection2d/type/test_detection3d.py | 44 ++-- 6 files changed, 293 insertions(+), 210 deletions(-) create mode 100644 dimos/perception/detection2d/type/detection3dpc.py diff --git a/dimos/perception/detection2d/conftest.py b/dimos/perception/detection2d/conftest.py index b19cdcbd00..abb7f01174 100644 --- a/dimos/perception/detection2d/conftest.py +++ b/dimos/perception/detection2d/conftest.py @@ -28,8 +28,10 @@ from dimos.perception.detection2d.type import ( Detection2D, Detection3D, + Detection3DPC, ImageDetections2D, ImageDetections3D, + ImageDetections3DPC, ) from dimos.protocol.tf import TF from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule @@ -47,7 +49,7 @@ class Moment(TypedDict, total=False): transforms: list[Transform] tf: TF annotations: Optional[ImageAnnotations] - detections: Optional[ImageDetections3D] + detections: Optional[ImageDetections3DPC] markers: Optional[MarkerArray] scene_update: Optional[SceneUpdate] @@ -57,7 +59,7 @@ class Moment2D(Moment): class Moment3D(Moment): - detections3d: ImageDetections3D + detections3dpc: ImageDetections3D @pytest.fixture @@ -110,11 +112,11 @@ def detection2d(get_moment_2d) -> Detection2D: @pytest.fixture -def detection3d(get_moment_3d) -> Detection3D: +def detection3dpc(get_moment_3d) -> Detection3DPC: moment = get_moment_3d(seek=10.0) - assert len(moment["detections3d"]) > 0, "No detections found in the moment" - print(moment["detections3d"]) - return moment["detections3d"][0] + assert len(moment["detections3dpc"]) > 0, "No detections found in the moment" + print(moment["detections3dpc"]) + return moment["detections3dpc"][0] @pytest.fixture @@ -150,7 +152,7 @@ def moment_provider(**kwargs) -> Moment2D: return { **moment, - "detections3d": module.process_frame( + "detections3dpc": module.process_frame( moment["detections2d"], moment["lidar_frame"], camera_transform ), } diff --git a/dimos/perception/detection2d/module3D.py b/dimos/perception/detection2d/module3D.py index 0ad3517bf5..21fc1ca7ef 100644 --- a/dimos/perception/detection2d/module3D.py +++ b/dimos/perception/detection2d/module3D.py @@ -24,8 +24,9 @@ from dimos.perception.detection2d.type import ( ImageDetections2D, ImageDetections3D, + ImageDetections3DPC, ) -from dimos.perception.detection2d.type.detection3d import Detection3D +from dimos.perception.detection2d.type.detection3dpc import Detection3DPC from dimos.types.timestamped import align_timestamped from dimos.utils.reactive import backpressure @@ -58,7 +59,7 @@ def process_frame( print("3d projection", detections, pointcloud, transform) detection3d_list = [] for detection in detections: - detection3d = Detection3D.from_2d( + detection3d = Detection3DPC.from_2d( detection, world_pointcloud=pointcloud, camera_info=self.camera_info, @@ -67,7 +68,7 @@ def process_frame( if detection3d is not None: detection3d_list.append(detection3d) - ret = ImageDetections3D(detections.image, detection3d_list) + ret = ImageDetections3DPC(detections.image, detection3d_list) print("3d projection finished", ret) return ret diff --git a/dimos/perception/detection2d/type/__init__.py b/dimos/perception/detection2d/type/__init__.py index fb7c435c0c..cffd5bb401 100644 --- a/dimos/perception/detection2d/type/__init__.py +++ b/dimos/perception/detection2d/type/__init__.py @@ -3,5 +3,12 @@ ImageDetections2D, InconvinientDetectionFormat, ) -from dimos.perception.detection2d.type.detection3d import Detection3D, ImageDetections3D +from dimos.perception.detection2d.type.detection3d import ( + Detection3D, + ImageDetections3D, +) +from dimos.perception.detection2d.type.detection3dpc import ( + Detection3DPC, + ImageDetections3DPC, +) from dimos.perception.detection2d.type.imageDetections import ImageDetections, TableStr diff --git a/dimos/perception/detection2d/type/detection3d.py b/dimos/perception/detection2d/type/detection3d.py index 8c0919b700..17d6a6d4d4 100644 --- a/dimos/perception/detection2d/type/detection3d.py +++ b/dimos/perception/detection2d/type/detection3d.py @@ -32,195 +32,21 @@ from dimos.perception.detection2d.type.imageDetections import ImageDetections from dimos.types.timestamped import to_ros_stamp -Detection3DFilter = Callable[ - [Detection2D, PointCloud2, CameraInfo, Transform], Optional["Detection3D"] -] - - -def height_filter(height=0.1) -> Detection3DFilter: - return lambda det, pc, ci, tf: pc.filter_by_height(height) - - -def statistical(nb_neighbors=40, std_ratio=0.5) -> Detection3DFilter: - def filter_func( - det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform - ) -> Optional[PointCloud2]: - try: - statistical, removed = pc.pointcloud.remove_statistical_outlier( - nb_neighbors=nb_neighbors, std_ratio=std_ratio - ) - return PointCloud2(statistical, pc.frame_id, pc.ts) - except Exception as e: - # print("statistical filter failed:", e) - return None - - return filter_func - - -def raycast() -> Detection3DFilter: - def filter_func( - det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform - ) -> Optional[PointCloud2]: - try: - camera_pos = tf.inverse().translation - camera_pos_np = camera_pos.to_numpy() - _, visible_indices = pc.pointcloud.hidden_point_removal(camera_pos_np, radius=100.0) - visible_pcd = pc.pointcloud.select_by_index(visible_indices) - return PointCloud2(visible_pcd, pc.frame_id, pc.ts) - except Exception as e: - # print("raycast filter failed:", e) - return None - - return filter_func - - -def radius_outlier(min_neighbors: int = 20, radius: float = 0.3) -> Detection3DFilter: - """ - Remove isolated points: keep only points that have at least `min_neighbors` - neighbors within `radius` meters (same units as your point cloud). - """ - - def filter_func( - det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform - ) -> Optional[PointCloud2]: - filtered_pcd, removed = pc.pointcloud.remove_radius_outlier( - nb_points=min_neighbors, radius=radius - ) - return PointCloud2(filtered_pcd, pc.frame_id, pc.ts) - - return filter_func - @dataclass class Detection3D(Detection2D): - pointcloud: PointCloud2 transform: Transform - frame_id: str = "unknown" + frame_id: str @classmethod def from_2d( cls, det: Detection2D, - world_pointcloud: PointCloud2, + distance: float, camera_info: CameraInfo, world_to_optical_transform: Transform, - # filters are to be adjusted based on the sensor noise characteristics if feeding - # sensor data directly - filters: list[Callable[[PointCloud2], PointCloud2]] = [ - # height_filter(0.1), - raycast(), - radius_outlier(), - statistical(), - ], ) -> Optional["Detection3D"]: - """Create a Detection3D from a 2D detection by projecting world pointcloud. - - This method handles: - 1. Projecting world pointcloud to camera frame - 2. Filtering points within the 2D detection bounding box - 3. Cleaning up the pointcloud (height filter, outlier removal) - 4. Hidden point removal from camera perspective - - Args: - det: The 2D detection - world_pointcloud: Full pointcloud in world frame - camera_info: Camera calibration info - world_to_camerlka_transform: Transform from world to camera frame - filters: List of functions to apply to the pointcloud for filtering - Returns: - Detection3D with filtered pointcloud, or None if no valid points - """ - # Extract camera parameters - fx, fy = camera_info.K[0], camera_info.K[4] - cx, cy = camera_info.K[2], camera_info.K[5] - image_width = camera_info.width - image_height = camera_info.height - - camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) - - # Convert pointcloud to numpy array - world_points = world_pointcloud.as_numpy() - - # Project points to camera frame - points_homogeneous = np.hstack([world_points, np.ones((world_points.shape[0], 1))]) - extrinsics_matrix = world_to_optical_transform.to_matrix() - points_camera = (extrinsics_matrix @ points_homogeneous.T).T - - # Filter out points behind the camera - valid_mask = points_camera[:, 2] > 0 - points_camera = points_camera[valid_mask] - world_points = world_points[valid_mask] - - if len(world_points) == 0: - return None - - # Project to 2D - points_2d_homogeneous = (camera_matrix @ points_camera[:, :3].T).T - points_2d = points_2d_homogeneous[:, :2] / points_2d_homogeneous[:, 2:3] - - # Filter points within image bounds - in_image_mask = ( - (points_2d[:, 0] >= 0) - & (points_2d[:, 0] < image_width) - & (points_2d[:, 1] >= 0) - & (points_2d[:, 1] < image_height) - ) - points_2d = points_2d[in_image_mask] - world_points = world_points[in_image_mask] - - if len(world_points) == 0: - return None - - # Extract bbox from Detection2D - x_min, y_min, x_max, y_max = det.bbox - - # Find points within this detection box (with small margin) - margin = 5 # pixels - in_box_mask = ( - (points_2d[:, 0] >= x_min - margin) - & (points_2d[:, 0] <= x_max + margin) - & (points_2d[:, 1] >= y_min - margin) - & (points_2d[:, 1] <= y_max + margin) - ) - - detection_points = world_points[in_box_mask] - - if detection_points.shape[0] == 0: - # print(f"No points found in detection bbox after projection. {det.name}") - return None - - # Create initial pointcloud for this detection - initial_pc = PointCloud2.from_numpy( - detection_points, - frame_id=world_pointcloud.frame_id, - timestamp=world_pointcloud.ts, - ) - - # Apply filters - each filter needs all 4 arguments - detection_pc = initial_pc - for filter_func in filters: - result = filter_func(det, detection_pc, camera_info, world_to_optical_transform) - if result is None: - return None - detection_pc = result - - # Final check for empty pointcloud - if len(detection_pc.pointcloud.points) == 0: - return None - - # Create Detection3D with filtered pointcloud - return Detection3D( - image=det.image, - bbox=det.bbox, - track_id=det.track_id, - class_id=det.class_id, - confidence=det.confidence, - name=det.name, - ts=det.ts, - pointcloud=detection_pc, - transform=world_to_optical_transform, - frame_id=world_pointcloud.frame_id, - ) + raise NotImplementedError() @functools.cached_property def center(self) -> Vector3: diff --git a/dimos/perception/detection2d/type/detection3dpc.py b/dimos/perception/detection2d/type/detection3dpc.py new file mode 100644 index 0000000000..44d242de9e --- /dev/null +++ b/dimos/perception/detection2d/type/detection3dpc.py @@ -0,0 +1,247 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import functools +from dataclasses import dataclass +from typing import Any, Callable, Dict, Optional, TypeVar + +import numpy as np +from dimos_lcm.sensor_msgs import CameraInfo +from lcm_msgs.builtin_interfaces import Duration +from lcm_msgs.foxglove_msgs import CubePrimitive, SceneEntity, SceneUpdate, TextPrimitive +from lcm_msgs.geometry_msgs import Point, Pose, Quaternion +from lcm_msgs.geometry_msgs import Vector3 as LCMVector3 + +from dimos.msgs.foxglove_msgs.Color import Color +from dimos.msgs.geometry_msgs import PoseStamped, Transform, Vector3 +from dimos.msgs.sensor_msgs import PointCloud2 +from dimos.perception.detection2d.type.detection2d import Detection2D +from dimos.perception.detection2d.type.detection3d import Detection3D +from dimos.perception.detection2d.type.imageDetections import ImageDetections +from dimos.types.timestamped import to_ros_stamp + +Detection3DPCFilter = Callable[ + [Detection2D, PointCloud2, CameraInfo, Transform], Optional["Detection3DPC"] +] + + +def height_filter(height=0.1) -> Detection3DPCFilter: + return lambda det, pc, ci, tf: pc.filter_by_height(height) + + +def statistical(nb_neighbors=40, std_ratio=0.5) -> Detection3DPCFilter: + def filter_func( + det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform + ) -> Optional[PointCloud2]: + try: + statistical, removed = pc.pointcloud.remove_statistical_outlier( + nb_neighbors=nb_neighbors, std_ratio=std_ratio + ) + return PointCloud2(statistical, pc.frame_id, pc.ts) + except Exception as e: + # print("statistical filter failed:", e) + return None + + return filter_func + + +def raycast() -> Detection3DPCFilter: + def filter_func( + det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform + ) -> Optional[PointCloud2]: + try: + camera_pos = tf.inverse().translation + camera_pos_np = camera_pos.to_numpy() + _, visible_indices = pc.pointcloud.hidden_point_removal(camera_pos_np, radius=100.0) + visible_pcd = pc.pointcloud.select_by_index(visible_indices) + return PointCloud2(visible_pcd, pc.frame_id, pc.ts) + except Exception as e: + # print("raycast filter failed:", e) + return None + + return filter_func + + +def radius_outlier(min_neighbors: int = 20, radius: float = 0.3) -> Detection3DPCFilter: + """ + Remove isolated points: keep only points that have at least `min_neighbors` + neighbors within `radius` meters (same units as your point cloud). + """ + + def filter_func( + det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform + ) -> Optional[PointCloud2]: + filtered_pcd, removed = pc.pointcloud.remove_radius_outlier( + nb_points=min_neighbors, radius=radius + ) + return PointCloud2(filtered_pcd, pc.frame_id, pc.ts) + + return filter_func + + +@dataclass +class Detection3DPC(Detection3D): + pointcloud: PointCloud2 + + @classmethod + def from_2d( + cls, + det: Detection2D, + world_pointcloud: PointCloud2, + camera_info: CameraInfo, + world_to_optical_transform: Transform, + # filters are to be adjusted based on the sensor noise characteristics if feeding + # sensor data directly + filters: list[Callable[[PointCloud2], PointCloud2]] = [ + # height_filter(0.1), + raycast(), + radius_outlier(), + statistical(), + ], + ) -> Optional["Detection3D"]: + """Create a Detection3D from a 2D detection by projecting world pointcloud. + + This method handles: + 1. Projecting world pointcloud to camera frame + 2. Filtering points within the 2D detection bounding box + 3. Cleaning up the pointcloud (height filter, outlier removal) + 4. Hidden point removal from camera perspective + + Args: + det: The 2D detection + world_pointcloud: Full pointcloud in world frame + camera_info: Camera calibration info + world_to_camerlka_transform: Transform from world to camera frame + filters: List of functions to apply to the pointcloud for filtering + Returns: + Detection3D with filtered pointcloud, or None if no valid points + """ + # Extract camera parameters + fx, fy = camera_info.K[0], camera_info.K[4] + cx, cy = camera_info.K[2], camera_info.K[5] + image_width = camera_info.width + image_height = camera_info.height + + camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) + + # Convert pointcloud to numpy array + world_points = world_pointcloud.as_numpy() + + # Project points to camera frame + points_homogeneous = np.hstack([world_points, np.ones((world_points.shape[0], 1))]) + extrinsics_matrix = world_to_optical_transform.to_matrix() + points_camera = (extrinsics_matrix @ points_homogeneous.T).T + + # Filter out points behind the camera + valid_mask = points_camera[:, 2] > 0 + points_camera = points_camera[valid_mask] + world_points = world_points[valid_mask] + + if len(world_points) == 0: + return None + + # Project to 2D + points_2d_homogeneous = (camera_matrix @ points_camera[:, :3].T).T + points_2d = points_2d_homogeneous[:, :2] / points_2d_homogeneous[:, 2:3] + + # Filter points within image bounds + in_image_mask = ( + (points_2d[:, 0] >= 0) + & (points_2d[:, 0] < image_width) + & (points_2d[:, 1] >= 0) + & (points_2d[:, 1] < image_height) + ) + points_2d = points_2d[in_image_mask] + world_points = world_points[in_image_mask] + + if len(world_points) == 0: + return None + + # Extract bbox from Detection2D + x_min, y_min, x_max, y_max = det.bbox + + # Find points within this detection box (with small margin) + margin = 5 # pixels + in_box_mask = ( + (points_2d[:, 0] >= x_min - margin) + & (points_2d[:, 0] <= x_max + margin) + & (points_2d[:, 1] >= y_min - margin) + & (points_2d[:, 1] <= y_max + margin) + ) + + detection_points = world_points[in_box_mask] + + if detection_points.shape[0] == 0: + # print(f"No points found in detection bbox after projection. {det.name}") + return None + + # Create initial pointcloud for this detection + initial_pc = PointCloud2.from_numpy( + detection_points, + frame_id=world_pointcloud.frame_id, + timestamp=world_pointcloud.ts, + ) + + # Apply filters - each filter needs all 4 arguments + detection_pc = initial_pc + for filter_func in filters: + result = filter_func(det, detection_pc, camera_info, world_to_optical_transform) + if result is None: + return None + detection_pc = result + + # Final check for empty pointcloud + if len(detection_pc.pointcloud.points) == 0: + return None + + # Create Detection3D with filtered pointcloud + return cls( + image=det.image, + bbox=det.bbox, + track_id=det.track_id, + class_id=det.class_id, + confidence=det.confidence, + name=det.name, + ts=det.ts, + pointcloud=detection_pc, + transform=world_to_optical_transform, + frame_id=world_pointcloud.frame_id, + ) + + +class ImageDetections3DPC(ImageDetections[Detection3DPC]): + """Specialized class for 3D detections in an image.""" + + def to_foxglove_scene_update(self) -> "SceneUpdate": + """Convert all detections to a Foxglove SceneUpdate message. + + Returns: + SceneUpdate containing SceneEntity objects for all detections + """ + + # Create SceneUpdate message with all detections + scene_update = SceneUpdate() + scene_update.deletions_length = 0 + scene_update.deletions = [] + scene_update.entities = [] + + # Process each detection + for i, detection in enumerate(self.detections): + entity = detection.to_foxglove_scene_entity(entity_id=f"detection_{detection.name}_{i}") + scene_update.entities.append(entity) + + scene_update.entities_length = len(scene_update.entities) + return scene_update diff --git a/dimos/perception/detection2d/type/test_detection3d.py b/dimos/perception/detection2d/type/test_detection3d.py index c8215b9601..5ccb95e904 100644 --- a/dimos/perception/detection2d/type/test_detection3d.py +++ b/dimos/perception/detection2d/type/test_detection3d.py @@ -16,9 +16,9 @@ import pytest -def test_oriented_bounding_box(detection3d): +def test_oriented_bounding_box(detection3dpc): """Test oriented bounding box calculation and values.""" - obb = detection3d.get_oriented_bounding_box() + obb = detection3dpc.get_oriented_bounding_box() assert obb is not None, "Oriented bounding box should not be None" # Verify OBB center values @@ -32,18 +32,18 @@ def test_oriented_bounding_box(detection3d): assert obb.extent[2] == pytest.approx(0.155, abs=0.1) -def test_bounding_box_dimensions(detection3d): +def test_bounding_box_dimensions(detection3dpc): """Test bounding box dimension calculation.""" - dims = detection3d.get_bounding_box_dimensions() + dims = detection3dpc.get_bounding_box_dimensions() assert len(dims) == 3, "Bounding box dimensions should have 3 values" assert dims[0] == pytest.approx(0.350, abs=0.1) assert dims[1] == pytest.approx(0.250, abs=0.1) assert dims[2] == pytest.approx(0.550, abs=0.1) -def test_axis_aligned_bounding_box(detection3d): +def test_axis_aligned_bounding_box(detection3dpc): """Test axis-aligned bounding box calculation.""" - aabb = detection3d.get_bounding_box() + aabb = detection3dpc.get_bounding_box() assert aabb is not None, "Axis-aligned bounding box should not be None" # Verify AABB min values @@ -57,12 +57,12 @@ def test_axis_aligned_bounding_box(detection3d): assert aabb.max_bound[2] == pytest.approx(0.475, abs=0.1) -def test_point_cloud_properties(detection3d): +def test_point_cloud_properties(detection3dpc): """Test point cloud data and boundaries.""" - pc_points = detection3d.pointcloud.points() + pc_points = detection3dpc.pointcloud.points() assert len(pc_points) in [69, 70] - assert detection3d.pointcloud.frame_id == "world", ( - f"Expected frame_id 'world', got '{detection3d.pointcloud.frame_id}'" + assert detection3dpc.pointcloud.frame_id == "world", ( + f"Expected frame_id 'world', got '{detection3dpc.pointcloud.frame_id}'" ) # Extract xyz coordinates from points @@ -86,9 +86,9 @@ def test_point_cloud_properties(detection3d): assert center[2] == pytest.approx(0.160, abs=0.1) -def test_foxglove_scene_entity_generation(detection3d): +def test_foxglove_scene_entity_generation(detection3dpc): """Test Foxglove scene entity creation and structure.""" - entity = detection3d.to_foxglove_scene_entity("test_entity_123") + entity = detection3dpc.to_foxglove_scene_entity("test_entity_123") # Verify entity metadata assert entity.id == "1", f"Expected entity ID '1', got '{entity.id}'" @@ -97,9 +97,9 @@ def test_foxglove_scene_entity_generation(detection3d): assert entity.texts_length == 1, f"Expected 1 text, got {entity.texts_length}" -def test_foxglove_cube_properties(detection3d): +def test_foxglove_cube_properties(detection3dpc): """Test Foxglove cube primitive properties.""" - entity = detection3d.to_foxglove_scene_entity("test_entity_123") + entity = detection3dpc.to_foxglove_scene_entity("test_entity_123") cube = entity.cubes[0] # Verify position @@ -119,9 +119,9 @@ def test_foxglove_cube_properties(detection3d): assert cube.color.a == pytest.approx(0.2, abs=0.1) -def test_foxglove_text_label(detection3d): +def test_foxglove_text_label(detection3dpc): """Test Foxglove text label properties.""" - entity = detection3d.to_foxglove_scene_entity("test_entity_123") + entity = detection3dpc.to_foxglove_scene_entity("test_entity_123") text = entity.texts[0] assert text.text == "1/suitcase (81%)", f"Expected text '1/suitcase (81%)', got '{text.text}'" @@ -131,11 +131,11 @@ def test_foxglove_text_label(detection3d): assert text.font_size == 20.0, f"Expected font size 20.0, got {text.font_size}" -def test_detection_pose(detection3d): +def test_detection_pose(detection3dpc): """Test detection pose and frame information.""" - assert detection3d.pose.x == pytest.approx(-3.327, abs=0.1) - assert detection3d.pose.y == pytest.approx(-0.202, abs=0.1) - assert detection3d.pose.z == pytest.approx(0.160, abs=0.1) - assert detection3d.pose.frame_id == "world", ( - f"Expected frame_id 'world', got '{detection3d.pose.frame_id}'" + assert detection3dpc.pose.x == pytest.approx(-3.327, abs=0.1) + assert detection3dpc.pose.y == pytest.approx(-0.202, abs=0.1) + assert detection3dpc.pose.z == pytest.approx(0.160, abs=0.1) + assert detection3dpc.pose.frame_id == "world", ( + f"Expected frame_id 'world', got '{detection3dpc.pose.frame_id}'" ) From ad7f73a037732a172e1f221291a745f555f523c4 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 8 Oct 2025 14:46:37 -0700 Subject: [PATCH 2/5] object3d new test --- dimos/perception/detection2d/module3D.py | 4 +- .../detection2d/type/test_detection3d.py | 132 ++-------------- .../detection2d/type/test_detection3dpc.py | 141 ++++++++++++++++++ 3 files changed, 153 insertions(+), 124 deletions(-) create mode 100644 dimos/perception/detection2d/type/test_detection3dpc.py diff --git a/dimos/perception/detection2d/module3D.py b/dimos/perception/detection2d/module3D.py index 21fc1ca7ef..ecfedb90ee 100644 --- a/dimos/perception/detection2d/module3D.py +++ b/dimos/perception/detection2d/module3D.py @@ -41,7 +41,7 @@ class Detection3DModule(Detection2DModule): detected_pointcloud_1: Out[PointCloud2] = None # type: ignore detected_pointcloud_2: Out[PointCloud2] = None # type: ignore - detection_3d_stream: Observable[ImageDetections3D] = None + detection_3d_stream: Observable[ImageDetections3DPC] = None def __init__(self, camera_info: CameraInfo, *args, **kwargs): super().__init__(*args, **kwargs) @@ -56,7 +56,6 @@ def process_frame( if not transform: return ImageDetections3D(detections.image, []) - print("3d projection", detections, pointcloud, transform) detection3d_list = [] for detection in detections: detection3d = Detection3DPC.from_2d( @@ -69,7 +68,6 @@ def process_frame( detection3d_list.append(detection3d) ret = ImageDetections3DPC(detections.image, detection3d_list) - print("3d projection finished", ret) return ret @rpc diff --git a/dimos/perception/detection2d/type/test_detection3d.py b/dimos/perception/detection2d/type/test_detection3d.py index 5ccb95e904..21cf3c3c27 100644 --- a/dimos/perception/detection2d/type/test_detection3d.py +++ b/dimos/perception/detection2d/type/test_detection3d.py @@ -11,131 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import numpy as np import pytest +from dimos.perception.detection2d.type.detection3d import Detection3D -def test_oriented_bounding_box(detection3dpc): - """Test oriented bounding box calculation and values.""" - obb = detection3dpc.get_oriented_bounding_box() - assert obb is not None, "Oriented bounding box should not be None" - - # Verify OBB center values - assert obb.center[0] == pytest.approx(-3.36002, abs=0.1) - assert obb.center[1] == pytest.approx(-0.196446, abs=0.1) - assert obb.center[2] == pytest.approx(0.220184, abs=0.1) - - # Verify OBB extent values - assert obb.extent[0] == pytest.approx(0.531275, abs=0.1) - assert obb.extent[1] == pytest.approx(0.461054, abs=0.1) - assert obb.extent[2] == pytest.approx(0.155, abs=0.1) - - -def test_bounding_box_dimensions(detection3dpc): - """Test bounding box dimension calculation.""" - dims = detection3dpc.get_bounding_box_dimensions() - assert len(dims) == 3, "Bounding box dimensions should have 3 values" - assert dims[0] == pytest.approx(0.350, abs=0.1) - assert dims[1] == pytest.approx(0.250, abs=0.1) - assert dims[2] == pytest.approx(0.550, abs=0.1) - - -def test_axis_aligned_bounding_box(detection3dpc): - """Test axis-aligned bounding box calculation.""" - aabb = detection3dpc.get_bounding_box() - assert aabb is not None, "Axis-aligned bounding box should not be None" - - # Verify AABB min values - assert aabb.min_bound[0] == pytest.approx(-3.575, abs=0.1) - assert aabb.min_bound[1] == pytest.approx(-0.375, abs=0.1) - assert aabb.min_bound[2] == pytest.approx(-0.075, abs=0.1) - - # Verify AABB max values - assert aabb.max_bound[0] == pytest.approx(-3.075, abs=0.1) - assert aabb.max_bound[1] == pytest.approx(-0.125, abs=0.1) - assert aabb.max_bound[2] == pytest.approx(0.475, abs=0.1) - - -def test_point_cloud_properties(detection3dpc): - """Test point cloud data and boundaries.""" - pc_points = detection3dpc.pointcloud.points() - assert len(pc_points) in [69, 70] - assert detection3dpc.pointcloud.frame_id == "world", ( - f"Expected frame_id 'world', got '{detection3dpc.pointcloud.frame_id}'" - ) - - # Extract xyz coordinates from points - points = np.array([[pt[0], pt[1], pt[2]] for pt in pc_points]) - - min_pt = np.min(points, axis=0) - max_pt = np.max(points, axis=0) - center = np.mean(points, axis=0) - - # Verify point cloud boundaries - assert min_pt[0] == pytest.approx(-3.575, abs=0.1) - assert min_pt[1] == pytest.approx(-0.375, abs=0.1) - assert min_pt[2] == pytest.approx(-0.075, abs=0.1) - - assert max_pt[0] == pytest.approx(-3.075, abs=0.1) - assert max_pt[1] == pytest.approx(-0.125, abs=0.1) - assert max_pt[2] == pytest.approx(0.475, abs=0.1) - - assert center[0] == pytest.approx(-3.326, abs=0.1) - assert center[1] == pytest.approx(-0.202, abs=0.1) - assert center[2] == pytest.approx(0.160, abs=0.1) - - -def test_foxglove_scene_entity_generation(detection3dpc): - """Test Foxglove scene entity creation and structure.""" - entity = detection3dpc.to_foxglove_scene_entity("test_entity_123") - - # Verify entity metadata - assert entity.id == "1", f"Expected entity ID '1', got '{entity.id}'" - assert entity.frame_id == "world", f"Expected frame_id 'world', got '{entity.frame_id}'" - assert entity.cubes_length == 1, f"Expected 1 cube, got {entity.cubes_length}" - assert entity.texts_length == 1, f"Expected 1 text, got {entity.texts_length}" - - -def test_foxglove_cube_properties(detection3dpc): - """Test Foxglove cube primitive properties.""" - entity = detection3dpc.to_foxglove_scene_entity("test_entity_123") - cube = entity.cubes[0] - - # Verify position - assert cube.pose.position.x == pytest.approx(-3.325, abs=0.1) - assert cube.pose.position.y == pytest.approx(-0.250, abs=0.1) - assert cube.pose.position.z == pytest.approx(0.200, abs=0.1) - - # Verify size - assert cube.size.x == pytest.approx(0.350, abs=0.1) - assert cube.size.y == pytest.approx(0.250, abs=0.1) - assert cube.size.z == pytest.approx(0.550, abs=0.1) - - # Verify color (green with alpha) - assert cube.color.r == pytest.approx(0.08235294117647059, abs=0.1) - assert cube.color.g == pytest.approx(0.7176470588235294, abs=0.1) - assert cube.color.b == pytest.approx(0.28627450980392155, abs=0.1) - assert cube.color.a == pytest.approx(0.2, abs=0.1) - - -def test_foxglove_text_label(detection3dpc): - """Test Foxglove text label properties.""" - entity = detection3dpc.to_foxglove_scene_entity("test_entity_123") - text = entity.texts[0] - assert text.text == "1/suitcase (81%)", f"Expected text '1/suitcase (81%)', got '{text.text}'" - assert text.pose.position.x == pytest.approx(-3.325, abs=0.1) - assert text.pose.position.y == pytest.approx(-0.250, abs=0.1) - assert text.pose.position.z == pytest.approx(0.575, abs=0.1) - assert text.font_size == 20.0, f"Expected font size 20.0, got {text.font_size}" +def test_guess_projection(get_moment_2d): + moment = get_moment_2d(seek=10.0) + for key, value in moment.items(): + print(key, "====================================") + print(value) + camera_info = moment.get("camera_info") + detection2d = moment.get("detections2d")[0] + tf = moment.get("tf") + transform = tf.get("camera_optical", "world", detection2d.ts, 5.0) -def test_detection_pose(detection3dpc): - """Test detection pose and frame information.""" - assert detection3dpc.pose.x == pytest.approx(-3.327, abs=0.1) - assert detection3dpc.pose.y == pytest.approx(-0.202, abs=0.1) - assert detection3dpc.pose.z == pytest.approx(0.160, abs=0.1) - assert detection3dpc.pose.frame_id == "world", ( - f"Expected frame_id 'world', got '{detection3dpc.pose.frame_id}'" - ) + detection3d = Detection3D.from_2d(detection2d, 1.5, camera_info, transform) diff --git a/dimos/perception/detection2d/type/test_detection3dpc.py b/dimos/perception/detection2d/type/test_detection3dpc.py new file mode 100644 index 0000000000..5ccb95e904 --- /dev/null +++ b/dimos/perception/detection2d/type/test_detection3dpc.py @@ -0,0 +1,141 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest + + +def test_oriented_bounding_box(detection3dpc): + """Test oriented bounding box calculation and values.""" + obb = detection3dpc.get_oriented_bounding_box() + assert obb is not None, "Oriented bounding box should not be None" + + # Verify OBB center values + assert obb.center[0] == pytest.approx(-3.36002, abs=0.1) + assert obb.center[1] == pytest.approx(-0.196446, abs=0.1) + assert obb.center[2] == pytest.approx(0.220184, abs=0.1) + + # Verify OBB extent values + assert obb.extent[0] == pytest.approx(0.531275, abs=0.1) + assert obb.extent[1] == pytest.approx(0.461054, abs=0.1) + assert obb.extent[2] == pytest.approx(0.155, abs=0.1) + + +def test_bounding_box_dimensions(detection3dpc): + """Test bounding box dimension calculation.""" + dims = detection3dpc.get_bounding_box_dimensions() + assert len(dims) == 3, "Bounding box dimensions should have 3 values" + assert dims[0] == pytest.approx(0.350, abs=0.1) + assert dims[1] == pytest.approx(0.250, abs=0.1) + assert dims[2] == pytest.approx(0.550, abs=0.1) + + +def test_axis_aligned_bounding_box(detection3dpc): + """Test axis-aligned bounding box calculation.""" + aabb = detection3dpc.get_bounding_box() + assert aabb is not None, "Axis-aligned bounding box should not be None" + + # Verify AABB min values + assert aabb.min_bound[0] == pytest.approx(-3.575, abs=0.1) + assert aabb.min_bound[1] == pytest.approx(-0.375, abs=0.1) + assert aabb.min_bound[2] == pytest.approx(-0.075, abs=0.1) + + # Verify AABB max values + assert aabb.max_bound[0] == pytest.approx(-3.075, abs=0.1) + assert aabb.max_bound[1] == pytest.approx(-0.125, abs=0.1) + assert aabb.max_bound[2] == pytest.approx(0.475, abs=0.1) + + +def test_point_cloud_properties(detection3dpc): + """Test point cloud data and boundaries.""" + pc_points = detection3dpc.pointcloud.points() + assert len(pc_points) in [69, 70] + assert detection3dpc.pointcloud.frame_id == "world", ( + f"Expected frame_id 'world', got '{detection3dpc.pointcloud.frame_id}'" + ) + + # Extract xyz coordinates from points + points = np.array([[pt[0], pt[1], pt[2]] for pt in pc_points]) + + min_pt = np.min(points, axis=0) + max_pt = np.max(points, axis=0) + center = np.mean(points, axis=0) + + # Verify point cloud boundaries + assert min_pt[0] == pytest.approx(-3.575, abs=0.1) + assert min_pt[1] == pytest.approx(-0.375, abs=0.1) + assert min_pt[2] == pytest.approx(-0.075, abs=0.1) + + assert max_pt[0] == pytest.approx(-3.075, abs=0.1) + assert max_pt[1] == pytest.approx(-0.125, abs=0.1) + assert max_pt[2] == pytest.approx(0.475, abs=0.1) + + assert center[0] == pytest.approx(-3.326, abs=0.1) + assert center[1] == pytest.approx(-0.202, abs=0.1) + assert center[2] == pytest.approx(0.160, abs=0.1) + + +def test_foxglove_scene_entity_generation(detection3dpc): + """Test Foxglove scene entity creation and structure.""" + entity = detection3dpc.to_foxglove_scene_entity("test_entity_123") + + # Verify entity metadata + assert entity.id == "1", f"Expected entity ID '1', got '{entity.id}'" + assert entity.frame_id == "world", f"Expected frame_id 'world', got '{entity.frame_id}'" + assert entity.cubes_length == 1, f"Expected 1 cube, got {entity.cubes_length}" + assert entity.texts_length == 1, f"Expected 1 text, got {entity.texts_length}" + + +def test_foxglove_cube_properties(detection3dpc): + """Test Foxglove cube primitive properties.""" + entity = detection3dpc.to_foxglove_scene_entity("test_entity_123") + cube = entity.cubes[0] + + # Verify position + assert cube.pose.position.x == pytest.approx(-3.325, abs=0.1) + assert cube.pose.position.y == pytest.approx(-0.250, abs=0.1) + assert cube.pose.position.z == pytest.approx(0.200, abs=0.1) + + # Verify size + assert cube.size.x == pytest.approx(0.350, abs=0.1) + assert cube.size.y == pytest.approx(0.250, abs=0.1) + assert cube.size.z == pytest.approx(0.550, abs=0.1) + + # Verify color (green with alpha) + assert cube.color.r == pytest.approx(0.08235294117647059, abs=0.1) + assert cube.color.g == pytest.approx(0.7176470588235294, abs=0.1) + assert cube.color.b == pytest.approx(0.28627450980392155, abs=0.1) + assert cube.color.a == pytest.approx(0.2, abs=0.1) + + +def test_foxglove_text_label(detection3dpc): + """Test Foxglove text label properties.""" + entity = detection3dpc.to_foxglove_scene_entity("test_entity_123") + text = entity.texts[0] + + assert text.text == "1/suitcase (81%)", f"Expected text '1/suitcase (81%)', got '{text.text}'" + assert text.pose.position.x == pytest.approx(-3.325, abs=0.1) + assert text.pose.position.y == pytest.approx(-0.250, abs=0.1) + assert text.pose.position.z == pytest.approx(0.575, abs=0.1) + assert text.font_size == 20.0, f"Expected font size 20.0, got {text.font_size}" + + +def test_detection_pose(detection3dpc): + """Test detection pose and frame information.""" + assert detection3dpc.pose.x == pytest.approx(-3.327, abs=0.1) + assert detection3dpc.pose.y == pytest.approx(-0.202, abs=0.1) + assert detection3dpc.pose.z == pytest.approx(0.160, abs=0.1) + assert detection3dpc.pose.frame_id == "world", ( + f"Expected frame_id 'world', got '{detection3dpc.pose.frame_id}'" + ) From 318721f085545648f324fd54777281bd3f50977b Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 8 Oct 2025 15:08:08 -0700 Subject: [PATCH 3/5] lcm replay test --- dimos/perception/detection2d/conftest.py | 40 ++++++++++++++++++- .../detection2d/type/test_detection3d.py | 19 ++++++--- 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/dimos/perception/detection2d/conftest.py b/dimos/perception/detection2d/conftest.py index abb7f01174..2ecc60ce7b 100644 --- a/dimos/perception/detection2d/conftest.py +++ b/dimos/perception/detection2d/conftest.py @@ -19,9 +19,10 @@ from dimos_lcm.foxglove_msgs.SceneUpdate import SceneUpdate from dimos_lcm.visualization_msgs.MarkerArray import MarkerArray +from dimos.core import LCMTransport from dimos.msgs.geometry_msgs import Transform -from dimos.msgs.sensor_msgs import CameraInfo -from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.sensor_msgs import CameraInfo, Image, PointCloud2 +from dimos.msgs.vision_msgs import Detection2DArray from dimos.perception.detection2d.module2D import Detection2DModule from dimos.perception.detection2d.module3D import Detection3DModule from dimos.perception.detection2d.moduleDB import ObjectDBModule @@ -104,6 +105,41 @@ def moment_provider(**kwargs) -> Moment: return moment_provider +@pytest.fixture +def publish_moment(): + def publisher(moment: Moment | Moment2D | Moment3D): + if moment.get("detections2d"): + # 2d annotations + annotations = LCMTransport("/annotations", ImageAnnotations) + annotations.publish(moment.get("detections2d").to_foxglove_annotations()) + + detections = LCMTransport("/detections", Detection2DArray) + detections.publish(moment.get("detections2d").to_ros_detection2d_array()) + + if moment.get("detections3dpc"): + scene_update = LCMTransport("/scene_update", SceneUpdate) + # 3d scene update + scene_update.publish(moment.get("detections3dpc").to_foxglove_scene_update()) + + lidar = LCMTransport("/lidar", PointCloud2) + lidar.publish(moment.get("lidar_frame")) + + image = LCMTransport("/image", Image) + image.publish(moment.get("image_frame")) + camera_info = LCMTransport("/camera_info", CameraInfo) + camera_info.publish(moment.get("camera_info")) + + tf = moment.get("tf") + tf.start() + tf.publish(*moment.get("transforms")) + tf.stop() + + # moduleDB.scene_update.transport = LCMTransport("/scene_update", SceneUpdate) + # moduleDB.target.transport = LCMTransport("/target", PoseStamped) + + return publisher + + @pytest.fixture def detection2d(get_moment_2d) -> Detection2D: moment = get_moment_2d(seek=10.0) diff --git a/dimos/perception/detection2d/type/test_detection3d.py b/dimos/perception/detection2d/type/test_detection3d.py index 21cf3c3c27..e59930120c 100644 --- a/dimos/perception/detection2d/type/test_detection3d.py +++ b/dimos/perception/detection2d/type/test_detection3d.py @@ -11,14 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import pytest + +import time from dimos.perception.detection2d.type.detection3d import Detection3D -def test_guess_projection(get_moment_2d): - moment = get_moment_2d(seek=10.0) +def test_guess_projection(get_moment_3d, publish_moment): + moment = get_moment_3d() for key, value in moment.items(): print(key, "====================================") print(value) @@ -28,4 +28,13 @@ def test_guess_projection(get_moment_2d): tf = moment.get("tf") transform = tf.get("camera_optical", "world", detection2d.ts, 5.0) - detection3d = Detection3D.from_2d(detection2d, 1.5, camera_info, transform) + # for stash + # detection3d = Detection3D.from_2d(detection2d, 1.5, camera_info, transform) + # print(detection3d) + + # foxglove bridge needs 2 messages per topic to pass to foxglove + publish_moment(moment) + time.sleep(0.1) + publish_moment(moment) + time.sleep(0.1) + publish_moment(moment) From 6e344116933da5ba5440e1091b73bc409413bf48 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 8 Oct 2025 15:10:08 -0700 Subject: [PATCH 4/5] thread cleanup --- dimos/perception/detection2d/conftest.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dimos/perception/detection2d/conftest.py b/dimos/perception/detection2d/conftest.py index 2ecc60ce7b..9d85e05e97 100644 --- a/dimos/perception/detection2d/conftest.py +++ b/dimos/perception/detection2d/conftest.py @@ -116,23 +116,29 @@ def publisher(moment: Moment | Moment2D | Moment3D): detections = LCMTransport("/detections", Detection2DArray) detections.publish(moment.get("detections2d").to_ros_detection2d_array()) + annotations.lcm.stop() + detections.lcm.stop() + if moment.get("detections3dpc"): scene_update = LCMTransport("/scene_update", SceneUpdate) # 3d scene update scene_update.publish(moment.get("detections3dpc").to_foxglove_scene_update()) + scene_update.lcm.stop() lidar = LCMTransport("/lidar", PointCloud2) lidar.publish(moment.get("lidar_frame")) + lidar.lcm.stop() image = LCMTransport("/image", Image) image.publish(moment.get("image_frame")) + image.lcm.stop() + camera_info = LCMTransport("/camera_info", CameraInfo) camera_info.publish(moment.get("camera_info")) + camera_info.lcm.stop() tf = moment.get("tf") - tf.start() tf.publish(*moment.get("transforms")) - tf.stop() # moduleDB.scene_update.transport = LCMTransport("/scene_update", SceneUpdate) # moduleDB.target.transport = LCMTransport("/target", PoseStamped) From 7ce9a62156752557d18c0096cff05f4d9efc4ee9 Mon Sep 17 00:00:00 2001 From: lesh Date: Wed, 8 Oct 2025 15:11:13 -0700 Subject: [PATCH 5/5] added seek for example --- dimos/perception/detection2d/type/test_detection3d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/perception/detection2d/type/test_detection3d.py b/dimos/perception/detection2d/type/test_detection3d.py index e59930120c..a9c29b98bf 100644 --- a/dimos/perception/detection2d/type/test_detection3d.py +++ b/dimos/perception/detection2d/type/test_detection3d.py @@ -18,7 +18,7 @@ def test_guess_projection(get_moment_3d, publish_moment): - moment = get_moment_3d() + moment = get_moment_3d(seek=10.0) for key, value in moment.items(): print(key, "====================================") print(value)