diff --git a/.envrc b/.envrc deleted file mode 100644 index e22018404a..0000000000 --- a/.envrc +++ /dev/null @@ -1,4 +0,0 @@ -if ! has nix_direnv_version || ! nix_direnv_version 3.0.6; then - source_url "https://raw.githubusercontent.com/nix-community/nix-direnv/3.0.6/direnvrc" "sha256-RYcUJaRMf8oF5LznDrlCXbkOQrywm0HDv1VjYGaJGdM=" -fi -use flake . diff --git a/.envrc b/.envrc new file mode 120000 index 0000000000..6da2c886b2 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +.envrc.nix \ No newline at end of file diff --git a/.gitignore b/.gitignore index adc50a7ef6..12cb51509a 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,6 @@ FastSAM-x.pt yolo11n.pt /thread_monitor_report.csv + +# symlink one of .envrc.* if you'd like to use +.envrc diff --git a/assets/foxglove_unitree_yolo.json b/assets/foxglove_unitree_yolo.json index e8156a2a05..ab53e4a71e 100644 --- a/assets/foxglove_unitree_yolo.json +++ b/assets/foxglove_unitree_yolo.json @@ -25,14 +25,13 @@ "color": "#248eff57" }, "ff758451-8c06-4419-a995-e93c825eb8be": { - "visible": true, + "visible": false, "frameLocked": true, "label": "Grid", "instanceId": "ff758451-8c06-4419-a995-e93c825eb8be", "layerId": "foxglove.Grid", "frameId": "base_link", - "size": 3, - "divisions": 3, + "divisions": 6, "lineWidth": 1.5, "color": "#24fff4ff", "position": [ @@ -45,18 +44,19 @@ 0, 0 ], - "order": 2 + "order": 2, + "size": 6 } }, "cameraState": { "perspective": true, - "distance": 72.10076346479195, - "phi": 58.649061347608615, - "thetaOffset": 141.2348270943936, + "distance": 13.268408624096915, + "phi": 26.658696672199024, + "thetaOffset": 99.69918626426482, "targetOffset": [ - 1.4329507220695694, - -1.682769241958899, - 0.3118341583211758 + 1.740213570345715, + 0.7318803628974015, + -1.5060700211358968 ], "target": [ 0, @@ -89,6 +89,15 @@ "transforms": { "frame:camera_link": { "visible": false + }, + "frame:sensor": { + "visible": false + }, + "frame:sensor_at_scan": { + "visible": false + }, + "frame:map": { + "visible": true } }, "topics": { @@ -99,28 +108,27 @@ "colorMode": "colormap", "colorMap": "turbo", "pointShape": "circle", - "pointSize": 10, - "explicitAlpha": 1, + "pointSize": 2, + "explicitAlpha": 0.8, "decayTime": 0, - "cubeSize": 0.1, - "minValue": -0.3, - "cubeOutline": false + "cubeSize": 0.05, + "cubeOutline": false, + "minValue": -2 }, "/odom": { - "visible": false, + "visible": true, "axisScale": 1 }, "/video": { "visible": false }, "/global_map": { - "visible": false, + "visible": true, "colorField": "z", "colorMode": "colormap", "colorMap": "turbo", - "pointSize": 10, "decayTime": 0, - "pointShape": "cube", + "pointShape": "square", "cubeOutline": false, "cubeSize": 0.08, "gradient": [ @@ -128,8 +136,9 @@ "#d1e2e2ff" ], "stixelsEnabled": false, - "explicitAlpha": 1, - "minValue": -0.2 + "explicitAlpha": 0.339, + "minValue": -0.2, + "pointSize": 5 }, "/global_path": { "visible": true, @@ -152,7 +161,7 @@ "visible": false }, "/global_costmap": { - "visible": true, + "visible": false, "maxColor": "#6b2b2bff", "frameLocked": false, "unknownColor": "#80808000", @@ -199,6 +208,197 @@ }, "/navigation_goal": { "visible": true + }, + "/debug_camera_optical_points": { + "stixelsEnabled": false, + "visible": false, + "pointSize": 0.07, + "pointShape": "cube", + "colorField": "z", + "colorMode": "colormap", + "colorMap": "turbo" + }, + "/debug_world_points": { + "visible": false, + "colorField": "z", + "colorMode": "colormap", + "colorMap": "rainbow", + "pointShape": "cube" + }, + "/filtered_points_suitcase_0": { + "visible": false, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointShape": "cube", + "flatColor": "#ff0808ff", + "cubeSize": 0.149, + "pointSize": 28.57 + }, + "/filtered_points_combined": { + "visible": true, + "flatColor": "#ff0000ff", + "pointShape": "cube", + "pointSize": 6.63, + "colorField": "z", + "colorMode": "gradient", + "colorMap": "rainbow", + "cubeSize": 0.35, + "gradient": [ + "#d100caff", + "#ff0000ff" + ] + }, + "/filtered_points_box_7": { + "visible": true, + "flatColor": "#fbfaffff", + "colorField": "intensity", + "colorMode": "colormap", + "colorMap": "turbo" + }, + "/filtered_pointcloud": { + "visible": true, + "colorField": "z", + "colorMode": "flat", + "colorMap": "turbo", + "flatColor": "#ff0000ff", + "pointSize": 40.21, + "pointShape": "cube", + "cubeSize": 0.1, + "cubeOutline": true + }, + "/detected": { + "visible": false, + "pointSize": 1.5, + "pointShape": "cube", + "cubeSize": 0.118, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "flatColor": "#d70000ff", + "cubeOutline": true + }, + "/detected_0": { + "visible": true, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointSize": 1.6, + "pointShape": "cube", + "cubeSize": 0.1, + "flatColor": "#e00000ff", + "stixelsEnabled": false, + "decayTime": 0, + "cubeOutline": true + }, + "/detected_1": { + "visible": true, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointShape": "cube", + "cubeSize": 0.1, + "flatColor": "#00ff15ff", + "cubeOutline": true + }, + "/image_detected_0": { + "visible": false + }, + "/detected/pointcloud/1": { + "visible": true, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointShape": "cube", + "flatColor": "#15ff00ff", + "pointSize": 0.1, + "cubeSize": 0.05, + "cubeOutline": true + }, + "/detected/pointcloud/2": { + "visible": true, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointShape": "cube", + "flatColor": "#00ffe1ff", + "pointSize": 10, + "cubeOutline": true, + "cubeSize": 0.05 + }, + "/detected/pointcloud/0": { + "visible": true, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointShape": "cube", + "flatColor": "#ff0000ff", + "cubeOutline": true, + "cubeSize": 0.04 + }, + "/detected/image/0": { + "visible": false + }, + "/detected/image/3": { + "visible": false + }, + "/detected/pointcloud/3": { + "visible": true, + "pointSize": 1.5, + "pointShape": "cube", + "cubeSize": 0.1, + "flatColor": "#00fffaff", + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo" + }, + "/detected/image/1": { + "visible": false + }, + "/registered_scan": { + "visible": true, + "colorField": "z", + "colorMode": "colormap", + "colorMap": "turbo", + "pointShape": "circle", + "pointSize": 2 + }, + "/image/camera_info": { + "visible": true, + "distance": 2 + }, + "/map": { + "visible": true, + "colorField": "z", + "colorMode": "colormap", + "colorMap": "turbo", + "pointShape": "square", + "cubeSize": 0.13, + "explicitAlpha": 1, + "pointSize": 1, + "decayTime": 2 + }, + "/detection3d/markers": { + "visible": true, + "color": "#88ff00ff", + "showOutlines": true, + "selectedIdVariable": "" + }, + "/foxglove/scene_update": { + "visible": true + }, + "/scene_update": { + "visible": true, + "showOutlines": true, + "computeVertexNormals": true + }, + "/target": { + "visible": true, + "axisScale": 1 + }, + "/goal_pose": { + "visible": true, + "axisScale": 0.5 } }, "publish": { @@ -211,8 +411,8 @@ "poseEstimateThetaDeviation": 0.26179939 }, "imageMode": {}, - "foxglovePanelTitle": "test", - "followTf": "world" + "foxglovePanelTitle": "", + "followTf": "map" }, "Image!3mnp456": { "cameraState": { @@ -245,7 +445,7 @@ "enableStats": false, "transforms": { "showLabel": false, - "visible": false + "visible": true } }, "transforms": { @@ -264,13 +464,14 @@ }, "topics": { "/lidar": { - "visible": true, - "colorField": "_auto_distance", + "visible": false, + "colorField": "z", "colorMode": "colormap", "colorMap": "turbo", - "pointSize": 4, + "pointSize": 6, "explicitAlpha": 0.6, - "pointShape": "square" + "pointShape": "circle", + "cubeSize": 0.016 }, "/odom": { "visible": false @@ -281,6 +482,103 @@ "/global_costmap": { "visible": false, "minColor": "#ffffff00" + }, + "/detected_0": { + "visible": true, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointSize": 23, + "pointShape": "cube", + "cubeSize": 0.04, + "flatColor": "#ff0000ff", + "stixelsEnabled": false + }, + "/detected_1": { + "visible": true, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointSize": 20.51, + "flatColor": "#34ff00ff", + "pointShape": "cube", + "cubeSize": 0.04, + "cubeOutline": false + }, + "/filtered_pointcloud": { + "visible": true, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "rainbow", + "pointSize": 1.5, + "pointShape": "cube", + "flatColor": "#ff0000ff", + "cubeSize": 0.1 + }, + "/global_map": { + "visible": false, + "colorField": "z", + "colorMode": "colormap", + "colorMap": "turbo", + "pointShape": "cube", + "pointSize": 5 + }, + "/detected/pointcloud/1": { + "visible": false, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointShape": "cube", + "cubeSize": 0.01, + "flatColor": "#00ff1eff", + "pointSize": 15, + "decayTime": 0, + "cubeOutline": true + }, + "/detected/pointcloud/2": { + "visible": false, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointShape": "circle", + "cubeSize": 0.1, + "flatColor": "#00fbffff", + "pointSize": 0.01 + }, + "/detected/pointcloud/0": { + "visible": false, + "colorField": "intensity", + "colorMode": "flat", + "colorMap": "turbo", + "pointShape": "cube", + "flatColor": "#ff0000ff", + "pointSize": 15, + "cubeOutline": true, + "cubeSize": 0.03 + }, + "/registered_scan": { + "visible": false, + "colorField": "z", + "colorMode": "colormap", + "colorMap": "turbo", + "pointShape": "circle", + "pointSize": 6.49 + }, + "/detection3d/markers": { + "visible": false + }, + "/foxglove/scene_update": { + "visible": true + }, + "/scene_update": { + "visible": false + }, + "/map": { + "visible": false, + "colorField": "z", + "colorMode": "colormap", + "colorMap": "turbo", + "pointSize": 8 } }, "layers": {}, @@ -305,40 +603,44 @@ } }, "synchronize": false, - "rotation": 0 + "rotation": 0, + "calibrationTopic": "/camera_info" }, "foxglovePanelTitle": "" }, - "StateTransitions!pu21x4": { + "Plot!3heo336": { "paths": [ { - "value": "/annotations.texts[1].text", - "timestampMethod": "receiveTime", - "label": "detection1" + "timestampMethod": "publishTime", + "value": "/image.header.stamp.sec", + "enabled": true, + "color": "#4e98e2", + "label": "image", + "showLine": false }, { - "value": "/annotations.texts[3].text", - "timestampMethod": "receiveTime", - "label": "detection2" - } - ], - "isSynced": true, - "showPoints": true, - "timeWindowMode": "automatic" - }, - "Plot!a1gj37": { - "paths": [ + "timestampMethod": "publishTime", + "value": "/map.header.stamp.sec", + "enabled": true, + "color": "#f5774d", + "label": "lidar", + "showLine": false + }, { - "timestampMethod": "receiveTime", - "value": "/annotations.points[0].points[0].x", + "timestampMethod": "publishTime", + "value": "/tf.transforms[0].header.stamp.sec", "enabled": true, - "color": "#4e98e2" + "color": "#f7df71", + "label": "tf", + "showLine": false }, { - "timestampMethod": "receiveTime", - "value": "/annotations.points[0].points[0].y", + "timestampMethod": "publishTime", + "value": "/odom.header.stamp.sec", "enabled": true, - "color": "#f5774d" + "color": "#5cd6a9", + "label": "odom", + "showLine": false } ], "showXAxisLabels": true, @@ -349,6 +651,161 @@ "isSynced": true, "xAxisVal": "timestamp", "sidebarDimension": 240 + }, + "Image!47pi3ov": { + "cameraState": { + "distance": 20, + "perspective": true, + "phi": 60, + "target": [ + 0, + 0, + 0 + ], + "targetOffset": [ + 0, + 0, + 0 + ], + "targetOrientation": [ + 0, + 0, + 0, + 1 + ], + "thetaOffset": 45, + "fovy": 45, + "near": 0.5, + "far": 5000 + }, + "followMode": "follow-pose", + "scene": {}, + "transforms": {}, + "topics": {}, + "layers": {}, + "publish": { + "type": "point", + "poseTopic": "/move_base_simple/goal", + "pointTopic": "/clicked_point", + "poseEstimateTopic": "/initialpose", + "poseEstimateXDeviation": 0.5, + "poseEstimateYDeviation": 0.5, + "poseEstimateThetaDeviation": 0.26179939 + }, + "imageMode": { + "imageTopic": "/detected/image/0" + } + }, + "Image!4kk50gw": { + "cameraState": { + "distance": 20, + "perspective": true, + "phi": 60, + "target": [ + 0, + 0, + 0 + ], + "targetOffset": [ + 0, + 0, + 0 + ], + "targetOrientation": [ + 0, + 0, + 0, + 1 + ], + "thetaOffset": 45, + "fovy": 45, + "near": 0.5, + "far": 5000 + }, + "followMode": "follow-pose", + "scene": {}, + "transforms": {}, + "topics": {}, + "layers": {}, + "publish": { + "type": "point", + "poseTopic": "/move_base_simple/goal", + "pointTopic": "/clicked_point", + "poseEstimateTopic": "/initialpose", + "poseEstimateXDeviation": 0.5, + "poseEstimateYDeviation": 0.5, + "poseEstimateThetaDeviation": 0.26179939 + }, + "imageMode": { + "imageTopic": "/detected/image/1" + } + }, + "Image!2348e0b": { + "cameraState": { + "distance": 20, + "perspective": true, + "phi": 60, + "target": [ + 0, + 0, + 0 + ], + "targetOffset": [ + 0, + 0, + 0 + ], + "targetOrientation": [ + 0, + 0, + 0, + 1 + ], + "thetaOffset": 45, + "fovy": 45, + "near": 0.5, + "far": 5000 + }, + "followMode": "follow-pose", + "scene": {}, + "transforms": {}, + "topics": {}, + "layers": {}, + "publish": { + "type": "point", + "poseTopic": "/move_base_simple/goal", + "pointTopic": "/clicked_point", + "poseEstimateTopic": "/initialpose", + "poseEstimateXDeviation": 0.5, + "poseEstimateYDeviation": 0.5, + "poseEstimateThetaDeviation": 0.26179939 + }, + "imageMode": { + "imageTopic": "/detected/image/2", + "synchronize": false + } + }, + "StateTransitions!pu21x4": { + "paths": [ + { + "value": "/annotations.texts[1].text", + "timestampMethod": "receiveTime", + "label": "detection1" + }, + { + "value": "/annotations.texts[3].text", + "timestampMethod": "receiveTime", + "label": "detection2" + }, + { + "value": "/annotations.texts[5].text", + "timestampMethod": "receiveTime", + "label": "detection3" + } + ], + "isSynced": true, + "showPoints": true, + "timeWindowMode": "automatic" } }, "globalVariables": {}, @@ -360,19 +817,33 @@ "tracks": [] }, "layout": { - "first": "3D!18i6zy7", + "first": { + "first": "3D!18i6zy7", + "second": "Image!3mnp456", + "direction": "row", + "splitPercentage": 47.265625 + }, "second": { - "first": "Image!3mnp456", + "first": "Plot!3heo336", "second": { - "first": "StateTransitions!pu21x4", - "second": "Plot!a1gj37", + "first": { + "first": "Image!47pi3ov", + "second": { + "first": "Image!4kk50gw", + "second": "Image!2348e0b", + "direction": "row" + }, + "direction": "row", + "splitPercentage": 33.06523681858802 + }, + "second": "StateTransitions!pu21x4", "direction": "column", - "splitPercentage": 32.661870503597136 + "splitPercentage": 86.63101604278076 }, - "direction": "column", - "splitPercentage": 59.451575262543756 + "direction": "row", + "splitPercentage": 46.39139486467731 }, - "direction": "row", - "splitPercentage": 55.3125 + "direction": "column", + "splitPercentage": 81.62970106075217 } } diff --git a/data/.lfs/replay_g1.tar.gz b/data/.lfs/replay_g1.tar.gz new file mode 100644 index 0000000000..67750bd0cf --- /dev/null +++ b/data/.lfs/replay_g1.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19ad1c53c4f8f9414c0921b94cd4c87e81bf0ad676881339f15ae2d8a8619311 +size 557410250 diff --git a/data/.lfs/replay_g1_run.tar.gz b/data/.lfs/replay_g1_run.tar.gz new file mode 100644 index 0000000000..86368ec788 --- /dev/null +++ b/data/.lfs/replay_g1_run.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00cf21f65a15994895150f74044f5d00d7aa873d24f071d249ecbd09cb8f2b26 +size 559554274 diff --git a/dimos/agents/memory/spatial_vector_db.py b/dimos/agents/memory/spatial_vector_db.py index afc77df12a..a4eefb792b 100644 --- a/dimos/agents/memory/spatial_vector_db.py +++ b/dimos/agents/memory/spatial_vector_db.py @@ -200,6 +200,9 @@ def _process_query_results(self, results) -> List[Dict]: processed_results = [] for i, vector_id in enumerate(results["ids"]): + if isinstance(vector_id, list) and not vector_id: + continue + lookup_id = vector_id[0] if isinstance(vector_id, list) else vector_id # Create the result dictionary with metadata regardless of image availability diff --git a/dimos/agents2/skills/ros_navigation.py b/dimos/agents2/skills/ros_navigation.py new file mode 100644 index 0000000000..d989f95b5d --- /dev/null +++ b/dimos/agents2/skills/ros_navigation.py @@ -0,0 +1,123 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +from typing import Any, Optional +from dimos.msgs.geometry_msgs import PoseStamped +from dimos.msgs.geometry_msgs.Vector3 import make_vector3 +from dimos.protocol.skill.skill import SkillContainer, skill +from dimos.utils.logging_config import setup_logger +from dimos.utils.transform_utils import euler_to_quaternion + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from dimos.robot.unitree_webrtc.unitree_g1 import UnitreeG1 + +logger = setup_logger(__file__) + + +class RosNavigation(SkillContainer): + _robot: "UnitreeG1" + _started: bool + + def __init__(self, robot: "UnitreeG1"): + self._robot = robot + self._similarity_threshold = 0.23 + self._started = False + + def __enter__(self) -> "RosNavigation": + self._started = True + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._close_module() + return False + + @skill() + def navigate_with_text(self, query: str) -> str: + """Navigate to a location by querying the existing semantic map using natural language. + + CALL THIS SKILL FOR ONE SUBJECT AT A TIME. For example: "Go to the person wearing a blue shirt in the living room", + you should call this skill twice, once for the person wearing a blue shirt and once for the living room. + + Args: + query: Text query to search for in the semantic map + """ + + print("X" * 10000) + + if not self._started: + raise ValueError(f"{self} has not been started.") + + success_msg = self._navigate_using_semantic_map(query) + if success_msg: + return success_msg + + return "Failed to navigate." + + def _navigate_using_semantic_map(self, query: str) -> str: + results = self._robot.spatial_memory.query_by_text(query) + + if not results: + return f"No matching location found in semantic map for '{query}'" + + best_match = results[0] + + goal_pose = self._get_goal_pose_from_result(best_match) + + if not goal_pose: + return f"Found a result for '{query}' but it didn't have a valid position." + + result = self._robot.nav.go_to(goal_pose) + + if not result: + return f"Failed to navigate for '{query}'" + + return f"Successfuly arrived at '{query}'" + + @skill() + def stop_movement(self) -> str: + """Immediatly stop moving.""" + + if not self._started: + raise ValueError(f"{self} has not been started.") + + self._robot.cancel_navigation() + + return "Stopped" + + def _get_goal_pose_from_result(self, result: dict[str, Any]) -> Optional[PoseStamped]: + similarity = 1.0 - (result.get("distance") or 1) + if similarity < self._similarity_threshold: + logger.warning( + f"Match found but similarity score ({similarity:.4f}) is below threshold ({self._similarity_threshold})" + ) + return None + + metadata = result.get("metadata") + if not metadata: + return None + + first = metadata[0] + pos_x = first.get("pos_x", 0) + pos_y = first.get("pos_y", 0) + theta = first.get("rot_z", 0) + + return PoseStamped( + ts=time.time(), + position=make_vector3(pos_x, pos_y, 0), + orientation=euler_to_quaternion(make_vector3(0, 0, theta)), + frame_id="map", + ) diff --git a/dimos/agents2/temp/webcam_agent.py b/dimos/agents2/temp/webcam_agent.py index 8e2538f832..9add760819 100644 --- a/dimos/agents2/temp/webcam_agent.py +++ b/dimos/agents2/temp/webcam_agent.py @@ -25,11 +25,15 @@ from pathlib import Path from dotenv import load_dotenv +from dimos.hardware.camera import zed +from dimos.msgs.geometry_msgs import Quaternion, Transform, Vector3 from dimos.agents2.cli.human import HumanInput # Add parent directories to path sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) +from dimos.hardware.camera.module import CameraModule +from dimos.hardware.camera.webcam import Webcam from threading import Thread @@ -39,8 +43,9 @@ from dimos.agents2 import Agent, Output, Reducer, Stream, skill from dimos.agents2.spec import Model, Provider from dimos.core import LCMTransport, Module, pLCMTransport, start -from dimos.hardware.webcam import ColorCameraModule, Webcam -from dimos.msgs.sensor_msgs import Image + +# from dimos.hardware.webcam import ColorCameraModule, Webcam +from dimos.msgs.sensor_msgs import Image, CameraInfo from dimos.protocol.skill.test_coordinator import SkillContainerTest from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2 from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer @@ -110,7 +115,24 @@ def main(): ) testcontainer = dimos.deploy(SkillContainerTest) - webcam = dimos.deploy(ColorCameraModule, hardware=lambda: Webcam(camera_index=0)) + webcam = dimos.deploy( + CameraModule, + transform=Transform( + translation=Vector3(0.0, 0.0, 0.0), + rotation=Quaternion(0.0, 0.0, 0.0, 1.0), + frame_id="base_link", + child_frame_id="camera_link", + ), + hardware=lambda: Webcam( + camera_index=0, + frequency=15, + stereo_slice="left", + camera_info=zed.CameraInfo.SingleWebcam, + ), + ) + + webcam.camera_info.transport = LCMTransport("/camera_info", CameraInfo) + webcam.image.transport = LCMTransport("/image", Image) webcam.start() diff --git a/dimos/core/module.py b/dimos/core/module.py index f7786bd55e..255f8b5ceb 100644 --- a/dimos/core/module.py +++ b/dimos/core/module.py @@ -108,7 +108,8 @@ def _close_rpc(self): @property def tf(self): if self._tf is None: - self._tf = self.config.tf_transport() + # self._tf = self.config.tf_transport() + self._tf = LCMTF() return self._tf @tf.setter diff --git a/dimos/hardware/camera/module.py b/dimos/hardware/camera/module.py new file mode 100644 index 0000000000..2b2880b80a --- /dev/null +++ b/dimos/hardware/camera/module.py @@ -0,0 +1,127 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import queue +import time +from dataclasses import dataclass, field +from typing import Any, Callable, Generic, Literal, Optional, Protocol, TypeVar + +import reactivex as rx +from dimos_lcm.sensor_msgs import CameraInfo +from reactivex import operators as ops +from reactivex.disposable import Disposable +from reactivex.observable import Observable + +from dimos.agents2 import Output, Reducer, Stream, skill +from dimos.core import Module, Out, rpc +from dimos.core.module import Module, ModuleConfig +from dimos.hardware.camera.spec import ( + CameraHardware, +) +from dimos.hardware.camera.webcam import Webcam, WebcamConfig +from dimos.msgs.geometry_msgs import Quaternion, Transform, Vector3 +from dimos.msgs.sensor_msgs import Image +from dimos.msgs.sensor_msgs.Image import Image, sharpness_barrier + +default_transform = lambda: Transform( + translation=Vector3(0.0, 0.0, 0.0), + rotation=Quaternion(0.0, 0.0, 0.0, 1.0), + frame_id="base_link", + child_frame_id="camera_link", +) + + +@dataclass +class CameraModuleConfig(ModuleConfig): + frame_id: str = "camera_link" + transform: Optional[Transform] = field(default_factory=default_transform) + hardware: Callable[[], CameraHardware] | CameraHardware = Webcam + + +class CameraModule(Module): + image: Out[Image] = None + camera_info: Out[CameraInfo] = None + + hardware: CameraHardware = None + _module_subscription: Optional[Disposable] = None + _camera_info_subscription: Optional[Disposable] = None + _skill_stream: Optional[Observable[Image]] = None + + default_config = CameraModuleConfig + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @rpc + def start(self): + if callable(self.config.hardware): + self.hardware = self.config.hardware() + else: + self.hardware = self.config.hardware + + if self._module_subscription: + return "already started" + + stream = self.hardware.image_stream().pipe(sharpness_barrier(5)) + + # camera_info_stream = self.camera_info_stream(frequency=5.0) + + def publish_info(camera_info: CameraInfo): + self.camera_info.publish(camera_info) + + if self.config.transform is None: + return + + camera_link = self.config.transform + camera_link.ts = camera_info.ts + camera_optical = Transform( + translation=Vector3(0.0, 0.0, 0.0), + rotation=Quaternion(-0.5, 0.5, -0.5, 0.5), + frame_id="camera_link", + child_frame_id="camera_optical", + ts=camera_link.ts, + ) + + self.tf.publish(camera_link, camera_optical) + + self._camera_info_subscription = self.camera_info_stream().subscribe(publish_info) + self._module_subscription = stream.subscribe(self.image.publish) + + @skill(stream=Stream.passive, output=Output.image, reducer=Reducer.latest) + def video_stream(self) -> Image: + """implicit video stream skill""" + _queue = queue.Queue(maxsize=1) + self.hardware.image_stream().subscribe(_queue.put) + + for image in iter(_queue.get, None): + yield image + + def camera_info_stream(self, frequency: float = 5.0) -> Observable[CameraInfo]: + def camera_info(_) -> CameraInfo: + self.hardware.camera_info.ts = time.time() + return self.hardware.camera_info + + return rx.interval(1.0 / frequency).pipe(ops.map(camera_info)) + + def stop(self): + if self._module_subscription: + self._module_subscription.dispose() + self._module_subscription = None + if self._camera_info_subscription: + self._camera_info_subscription.dispose() + self._camera_info_subscription = None + # Also stop the hardware if it has a stop method + if self.hardware and hasattr(self.hardware, "stop"): + self.hardware.stop() + super().stop() diff --git a/dimos/hardware/camera/spec.py b/dimos/hardware/camera/spec.py new file mode 100644 index 0000000000..cc69db5d1c --- /dev/null +++ b/dimos/hardware/camera/spec.py @@ -0,0 +1,55 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod, abstractproperty +from typing import Generic, Optional, Protocol, TypeVar + +from dimos_lcm.sensor_msgs import CameraInfo +from reactivex.observable import Observable + +from dimos.msgs.sensor_msgs import Image +from dimos.protocol.service import Configurable + + +class CameraConfig(Protocol): + frame_id_prefix: Optional[str] + + +CameraConfigT = TypeVar("CameraConfigT", bound=CameraConfig) + + +class CameraHardware(ABC, Configurable[CameraConfigT], Generic[CameraConfigT]): + @abstractmethod + def image_stream(self) -> Observable[Image]: + pass + + @abstractproperty + def camera_info(self) -> CameraInfo: + pass + + +# This is an example, feel free to change spec for stereo cameras +# e.g., separate camera_info or streams for left/right, etc. +class StereoCameraHardware(ABC, Configurable[CameraConfigT], Generic[CameraConfigT]): + @abstractmethod + def image_stream(self) -> Observable[Image]: + pass + + @abstractmethod + def depth_stream(self) -> Observable[Image]: + pass + + @abstractproperty + def camera_info(self) -> CameraInfo: + pass diff --git a/dimos/hardware/camera/test_webcam.py b/dimos/hardware/camera/test_webcam.py new file mode 100644 index 0000000000..0f6a509084 --- /dev/null +++ b/dimos/hardware/camera/test_webcam.py @@ -0,0 +1,108 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +import pytest + +from dimos import core +from dimos.hardware.camera import zed +from dimos.hardware.camera.module import CameraModule +from dimos.hardware.camera.webcam import Webcam +from dimos.msgs.geometry_msgs import Quaternion, Transform, Vector3 +from dimos.msgs.sensor_msgs import CameraInfo, Image + + +@pytest.mark.tool +def test_streaming_single(): + dimos = core.start(1) + + camera = dimos.deploy( + CameraModule, + transform=Transform( + translation=Vector3(0.05, 0.0, 0.0), + rotation=Quaternion(0.0, 0.0, 0.0, 1.0), + frame_id="sensor", + child_frame_id="camera_link", + ), + hardware=lambda: Webcam( + stereo_slice="left", + camera_index=0, + frequency=15, + camera_info=zed.CameraInfo.SingleWebcam, + ), + ) + + camera.image.transport = core.LCMTransport("/image1", Image) + camera.camera_info.transport = core.LCMTransport("/image1/camera_info", CameraInfo) + camera.start() + + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + camera.stop() + dimos.stop() + + +@pytest.mark.tool +def test_streaming_double(): + dimos = core.start(2) + + camera1 = dimos.deploy( + CameraModule, + transform=Transform( + translation=Vector3(0.05, 0.0, 0.0), + rotation=Quaternion(0.0, 0.0, 0.0, 1.0), + frame_id="sensor", + child_frame_id="camera_link", + ), + hardware=lambda: Webcam( + stereo_slice="left", + camera_index=0, + frequency=15, + camera_info=zed.CameraInfo.SingleWebcam, + ), + ) + + camera2 = dimos.deploy( + CameraModule, + transform=Transform( + translation=Vector3(0.05, 0.0, 0.0), + rotation=Quaternion(0.0, 0.0, 0.0, 1.0), + frame_id="sensor", + child_frame_id="camera_link", + ), + hardware=lambda: Webcam( + camera_index=4, + frequency=15, + stereo_slice="left", + camera_info=zed.CameraInfo.SingleWebcam, + ), + ) + + camera1.image.transport = core.LCMTransport("/image1", Image) + camera1.camera_info.transport = core.LCMTransport("/image1/camera_info", CameraInfo) + camera1.start() + camera2.image.transport = core.LCMTransport("/image2", Image) + camera2.camera_info.transport = core.LCMTransport("/image2/camera_info", CameraInfo) + camera2.start() + + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + camera1.stop() + camera2.stop() + dimos.stop() diff --git a/dimos/hardware/webcam.py b/dimos/hardware/camera/webcam.py similarity index 69% rename from dimos/hardware/webcam.py rename to dimos/hardware/camera/webcam.py index d7413ccb1b..87ba492d6e 100644 --- a/dimos/hardware/webcam.py +++ b/dimos/hardware/camera/webcam.py @@ -15,57 +15,36 @@ import queue import threading import time -from abc import ABC, abstractmethod, abstractproperty from dataclasses import dataclass, field from functools import cache from typing import Any, Callable, Generic, Literal, Optional, Protocol, TypeVar import cv2 -import numpy as np from dimos_lcm.sensor_msgs import CameraInfo from reactivex import create from reactivex.observable import Observable -from dimos.agents2 import Output, Reducer, Stream, skill -from dimos.core import Module, Out, rpc -from dimos.core.module import DaskModule, ModuleConfig +from dimos.hardware.camera.spec import ( + CameraConfig, + CameraHardware, +) from dimos.msgs.sensor_msgs import Image from dimos.msgs.sensor_msgs.Image import ImageFormat -from dimos.protocol.service import Configurable, Service from dimos.utils.reactive import backpressure -class CameraConfig(Protocol): - frame_id_prefix: Optional[str] - - -CameraConfigT = TypeVar("CameraConfigT", bound=CameraConfig) - - -# StereoCamera interface, for cameras that provide standard -# color, depth, pointcloud, and pose messages -class ColorCameraHardware(Configurable[CameraConfigT], Generic[CameraConfigT]): - @abstractmethod - def color_stream(self) -> Observable[Image]: - pass - - @abstractproperty - def camera_info(self) -> CameraInfo: - pass - - @dataclass class WebcamConfig(CameraConfig): - camera_index: int = 0 + camera_index: int = 0 # /dev/videoN frame_width: int = 640 frame_height: int = 480 - frequency: int = 10 + frequency: int = 15 camera_info: CameraInfo = field(default_factory=CameraInfo) frame_id_prefix: Optional[str] = None stereo_slice: Optional[Literal["left", "right"]] = None # For stereo cameras -class Webcam(ColorCameraHardware[WebcamConfig]): +class Webcam(CameraHardware[WebcamConfig]): default_config = WebcamConfig def __init__(self, *args, **kwargs): @@ -76,7 +55,7 @@ def __init__(self, *args, **kwargs): self._observer = None @cache - def color_stream(self) -> Observable[Image]: + def image_stream(self) -> Observable[Image]: """Create an observable that starts/stops camera on subscription""" def subscribe(observer, scheduler=None): @@ -117,7 +96,6 @@ def start(self): self._capture_thread = threading.Thread(target=self._capture_loop, daemon=True) self._capture_thread.start() - @rpc def stop(self): """Stop capturing frames""" # Signal thread to stop @@ -191,56 +169,6 @@ def _capture_loop(self): @property def camera_info(self) -> CameraInfo: - """Return the camera info from config""" return self.config.camera_info def emit(self, image: Image): ... - - def image_stream(self): - return self.image.observable() - - -@dataclass -class ColorCameraModuleConfig(ModuleConfig): - hardware: Callable[[], ColorCameraHardware] | ColorCameraHardware = Webcam - - -class ColorCameraModule(DaskModule): - image: Out[Image] = None - hardware: ColorCameraHardware = None - _module_subscription: Optional[Any] = None # Subscription disposable - _skill_stream: Optional[Observable[Image]] = None - default_config = ColorCameraModuleConfig - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - @rpc - def start(self): - if callable(self.config.hardware): - self.hardware = self.config.hardware() - else: - self.hardware = self.config.hardware - - if self._module_subscription: - return "already started" - stream = self.hardware.color_stream() - self._module_subscription = stream.subscribe(self.image.publish) - - @skill(stream=Stream.passive, output=Output.image, reducer=Reducer.latest) - def video_stream(self) -> Image: - """implicit video stream skill""" - _queue = queue.Queue(maxsize=1) - self.hardware.color_stream().subscribe(_queue.put) - - for image in iter(_queue.get, None): - yield image - - def stop(self): - if self._module_subscription: - self._module_subscription.dispose() - self._module_subscription = None - # Also stop the hardware if it has a stop method - if self.hardware and hasattr(self.hardware, "stop"): - self.hardware.stop() - super().stop() diff --git a/dimos/hardware/test_webcam.py b/dimos/hardware/test_webcam.py deleted file mode 100644 index e6d26d2b8d..0000000000 --- a/dimos/hardware/test_webcam.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time - -import pytest - -from dimos.core import LCMTransport, start -from dimos.hardware.webcam import ColorCameraModule, Webcam -from dimos.msgs.sensor_msgs import Image - - -@pytest.mark.tool -def test_basic(): - webcam = Webcam() - subscription = webcam.color_stream().subscribe( - on_next=lambda img: print(f"Got image: {img.width}x{img.height}"), - on_error=lambda e: print(f"Error: {e}"), - on_completed=lambda: print("Stream completed"), - ) - - # Keep the subscription alive for a few seconds - try: - time.sleep(3) - finally: - # Clean disposal - subscription.dispose() - print("Test completed") - - -@pytest.mark.tool -def test_module(): - dimos = start(1) - # Deploy ColorCameraModule, not Webcam directly - camera_module = dimos.deploy( - ColorCameraModule, - hardware=lambda: Webcam(camera_index=4, frequency=30, stereo_slice="left"), - ) - camera_module.image.transport = LCMTransport("/image", Image) - camera_module.start() - - test_transport = LCMTransport("/image", Image) - test_transport.subscribe(print) - - time.sleep(60) - - print("shutting down") - camera_module.stop() - time.sleep(1.0) - dimos.stop() - - -if __name__ == "__main__": - test_module() diff --git a/dimos/mapping/types.py b/dimos/mapping/types.py index 1ee2263e21..3ceb64c56b 100644 --- a/dimos/mapping/types.py +++ b/dimos/mapping/types.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + from dataclasses import dataclass from typing import Optional, TypeAlias diff --git a/dimos/models/vl/__init__.py b/dimos/models/vl/__init__.py new file mode 100644 index 0000000000..8cb0a7944b --- /dev/null +++ b/dimos/models/vl/__init__.py @@ -0,0 +1,2 @@ +from dimos.models.vl.base import VlModel +from dimos.models.vl.qwen import QwenVlModel diff --git a/dimos/models/vl/qwen.py b/dimos/models/vl/qwen.py index 4944f081ae..39853c6061 100644 --- a/dimos/models/vl/qwen.py +++ b/dimos/models/vl/qwen.py @@ -1,12 +1,13 @@ -import os import base64 import io +import os from typing import Optional -import numpy as np + from openai import OpenAI from PIL import Image from dimos.models.vl.base import VlModel +from dimos.msgs.sensor_msgs import Image class QwenVlModel(VlModel): @@ -27,12 +28,13 @@ def __init__(self, api_key: Optional[str] = None, model_name: str = "qwen2.5-vl- api_key=api_key, ) - def query(self, image: np.ndarray, query: str) -> str: - pil_image = Image.fromarray(image.astype("uint8")) - buffered = io.BytesIO() - pil_image.save(buffered, format="PNG") - img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") + def query(self, image: Image, query: str) -> str: + # pil_image = Image.fromarray(image.astype("uint8")) + # buffered = io.BytesIO() + # pil_image.save(buffered, format="PNG") + # img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") + img_base64 = image.to_base64() response = self._client.chat.completions.create( model=self._model_name, messages=[ diff --git a/dimos/msgs/foxglove_msgs/Color.py b/dimos/msgs/foxglove_msgs/Color.py new file mode 100644 index 0000000000..30362f837a --- /dev/null +++ b/dimos/msgs/foxglove_msgs/Color.py @@ -0,0 +1,50 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import hashlib +from dimos_lcm.foxglove_msgs import Color as LCMColor + + +class Color(LCMColor): + """Color with convenience methods.""" + + @classmethod + def from_string(cls, name: str, alpha: float = 0.2) -> Color: + """Generate a consistent color from a string using hash function. + + Args: + name: String to generate color from + alpha: Transparency value (0.0-1.0) + + Returns: + Color instance with deterministic RGB values + """ + # Hash the string to get consistent values + hash_obj = hashlib.md5(name.encode()) + hash_bytes = hash_obj.digest() + + # Use first 3 bytes for RGB (0-255) + r = hash_bytes[0] / 255.0 + g = hash_bytes[1] / 255.0 + b = hash_bytes[2] / 255.0 + + # Create and return color instance + color = cls() + color.r = r + color.g = g + color.b = b + color.a = alpha + return color diff --git a/dimos/msgs/geometry_msgs/Transform.py b/dimos/msgs/geometry_msgs/Transform.py index 468d009519..4db4c929a7 100644 --- a/dimos/msgs/geometry_msgs/Transform.py +++ b/dimos/msgs/geometry_msgs/Transform.py @@ -87,6 +87,9 @@ def lcm_transform(self) -> LCMTransformStamped: ), ) + def apply(self, other: "Transform") -> "Transform": + return self.__add__(other) + def __add__(self, other: "Transform") -> "Transform": """Compose two transforms (transform composition). @@ -248,7 +251,7 @@ def from_pose(cls, frame_id: str, pose: "Pose | PoseStamped") -> "Transform": else: raise TypeError(f"Expected Pose or PoseStamped, got {type(pose).__name__}") - def to_pose(self) -> "PoseStamped": + def to_pose(self, **kwargs) -> "PoseStamped": """Create a Transform from a Pose or PoseStamped. Args: @@ -262,9 +265,12 @@ def to_pose(self) -> "PoseStamped": # Handle both Pose and PoseStamped return PoseStamped( - position=self.translation, - orientation=self.rotation, - frame_id=self.frame_id, + **{ + "position": self.translation, + "orientation": self.rotation, + "frame_id": self.frame_id, + }, + **kwargs, ) def to_matrix(self) -> "np.ndarray": diff --git a/dimos/msgs/geometry_msgs/Vector3.py b/dimos/msgs/geometry_msgs/Vector3.py index 72e2fcef36..2eb204693b 100644 --- a/dimos/msgs/geometry_msgs/Vector3.py +++ b/dimos/msgs/geometry_msgs/Vector3.py @@ -155,6 +155,10 @@ def getArrow(): return f"{getArrow()} Vector {self.__repr__()}" + def agent_encode(self) -> dict: + """Encode the vector for agent communication.""" + return {"x": self.x, "y": self.y, "z": self.z} + def serialize(self) -> dict: """Serialize the vector to a tuple.""" return {"type": "vector", "c": (self.x, self.y, self.z)} diff --git a/dimos/msgs/sensor_msgs/Image.py b/dimos/msgs/sensor_msgs/Image.py index b79e07639f..30c74fd243 100644 --- a/dimos/msgs/sensor_msgs/Image.py +++ b/dimos/msgs/sensor_msgs/Image.py @@ -13,17 +13,15 @@ # limitations under the License. import base64 +import functools import time from dataclasses import dataclass, field -from datetime import timedelta from enum import Enum from typing import Literal, Optional, Tuple, TypedDict import cv2 import numpy as np import reactivex as rx - -# Import LCM types from dimos_lcm.sensor_msgs.Image import Image as LCMImage from dimos_lcm.std_msgs.Header import Header from reactivex import operators as ops @@ -31,6 +29,12 @@ from reactivex.scheduler import ThreadPoolScheduler from dimos.types.timestamped import Timestamped, TimestampedBufferCollection, to_human_readable +from dimos.utils.reactive import quality_barrier + +try: + from sensor_msgs.msg import Image as ROSImage +except ImportError: + ROSImage = None class ImageFormat(Enum): @@ -297,6 +301,7 @@ def crop(self, x: int, y: int, width: int, height: int) -> "Image": ts=self.ts, ) + @functools.cached_property def sharpness(self) -> float: """ Compute the Tenengrad focus measure for an image. @@ -497,6 +502,67 @@ def _parse_encoding(encoding: str) -> dict: return encoding_map[encoding] + @classmethod + def from_ros_msg(cls, ros_msg: ROSImage) -> "Image": + """Create an Image from a ROS sensor_msgs/Image message. + + Args: + ros_msg: ROS Image message + + Returns: + Image instance + """ + # Convert timestamp from ROS header + ts = ros_msg.header.stamp.sec + (ros_msg.header.stamp.nanosec / 1_000_000_000) + + # Parse encoding to determine format and data type + format_info = cls._parse_encoding(ros_msg.encoding) + + # Convert data from ROS message (array.array) to numpy array + data_array = np.frombuffer(ros_msg.data, dtype=format_info["dtype"]) + + # Reshape to image dimensions + if format_info["channels"] == 1: + data_array = data_array.reshape((ros_msg.height, ros_msg.width)) + else: + data_array = data_array.reshape( + (ros_msg.height, ros_msg.width, format_info["channels"]) + ) + + # Crop to center 1/3 of the image (simulate 120-degree FOV from 360-degree) + original_width = data_array.shape[1] + crop_width = original_width // 3 + start_x = (original_width - crop_width) // 2 + end_x = start_x + crop_width + + # Crop the image horizontally to center 1/3 + if len(data_array.shape) == 2: + # Grayscale image + data_array = data_array[:, start_x:end_x] + else: + # Color image + data_array = data_array[:, start_x:end_x, :] + + # Fix color channel order: if ROS sends RGB but we expect BGR, swap channels + # ROS typically uses rgb8 encoding, but OpenCV/our system expects BGR + if format_info["format"] == ImageFormat.RGB: + # Convert RGB to BGR by swapping channels + if len(data_array.shape) == 3 and data_array.shape[2] == 3: + data_array = data_array[:, :, [2, 1, 0]] # RGB -> BGR + format_info["format"] = ImageFormat.BGR + elif format_info["format"] == ImageFormat.RGBA: + # Convert RGBA to BGRA by swapping channels + if len(data_array.shape) == 3 and data_array.shape[2] == 4: + data_array = data_array[:, :, [2, 1, 0, 3]] # RGBA -> BGRA + format_info["format"] = ImageFormat.BGRA + + return cls( + data=data_array, + format=format_info["format"], + frame_id=ros_msg.header.frame_id, + ts=ts, + ) + def __repr__(self) -> str: """String representation.""" return ( @@ -522,18 +588,8 @@ def __len__(self) -> int: def sharpness_window(target_frequency: float, source: Observable[Image]) -> Observable[Image]: - window = TimestampedBufferCollection(1.0 / target_frequency) - source.subscribe(window.add) - - thread_scheduler = ThreadPoolScheduler(max_workers=1) - - def find_best(*argv): - if not window._items: - return None + raise NotImplementedError("use sharpness_barrier instead") - found = max(window._items, key=lambda x: x.sharpness()) - return found - return rx.interval(1.0 / target_frequency).pipe( - ops.observe_on(thread_scheduler), ops.map(find_best), ops.filter(lambda x: x is not None) - ) +def sharpness_barrier(target_frequency: float): + return quality_barrier(lambda x: x.sharpness, target_frequency) diff --git a/dimos/msgs/sensor_msgs/Joy.py b/dimos/msgs/sensor_msgs/Joy.py new file mode 100644 index 0000000000..e528b304b6 --- /dev/null +++ b/dimos/msgs/sensor_msgs/Joy.py @@ -0,0 +1,181 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import time +from typing import List, TypeAlias + +from dimos_lcm.sensor_msgs import Joy as LCMJoy + +try: + from sensor_msgs.msg import Joy as ROSJoy +except ImportError: + ROSJoy = None + +from plum import dispatch + +from dimos.types.timestamped import Timestamped + +# Types that can be converted to/from Joy +JoyConvertable: TypeAlias = ( + tuple[List[float], List[int]] | dict[str, List[float] | List[int]] | LCMJoy +) + + +def sec_nsec(ts): + s = int(ts) + return [s, int((ts - s) * 1_000_000_000)] + + +class Joy(Timestamped): + msg_name = "sensor_msgs.Joy" + ts: float + frame_id: str + axes: List[float] + buttons: List[int] + + @dispatch + def __init__( + self, + ts: float = 0.0, + frame_id: str = "", + axes: List[float] | None = None, + buttons: List[int] | None = None, + ) -> None: + """Initialize a Joy message. + + Args: + ts: Timestamp in seconds + frame_id: Frame ID for the message + axes: List of axis values (typically -1.0 to 1.0) + buttons: List of button states (0 or 1) + """ + self.ts = ts if ts != 0 else time.time() + self.frame_id = frame_id + self.axes = axes if axes is not None else [] + self.buttons = buttons if buttons is not None else [] + + @dispatch + def __init__(self, joy_tuple: tuple[List[float], List[int]]) -> None: + """Initialize from a tuple of (axes, buttons).""" + self.ts = time.time() + self.frame_id = "" + self.axes = list(joy_tuple[0]) + self.buttons = list(joy_tuple[1]) + + @dispatch + def __init__(self, joy_dict: dict[str, List[float] | List[int]]) -> None: + """Initialize from a dictionary with 'axes' and 'buttons' keys.""" + self.ts = joy_dict.get("ts", time.time()) + self.frame_id = joy_dict.get("frame_id", "") + self.axes = list(joy_dict.get("axes", [])) + self.buttons = list(joy_dict.get("buttons", [])) + + @dispatch + def __init__(self, joy: Joy) -> None: + """Initialize from another Joy (copy constructor).""" + self.ts = joy.ts + self.frame_id = joy.frame_id + self.axes = list(joy.axes) + self.buttons = list(joy.buttons) + + @dispatch + def __init__(self, lcm_joy: LCMJoy) -> None: + """Initialize from an LCM Joy message.""" + self.ts = lcm_joy.header.stamp.sec + (lcm_joy.header.stamp.nsec / 1_000_000_000) + self.frame_id = lcm_joy.header.frame_id + self.axes = list(lcm_joy.axes) + self.buttons = list(lcm_joy.buttons) + + def lcm_encode(self) -> bytes: + lcm_msg = LCMJoy() + [lcm_msg.header.stamp.sec, lcm_msg.header.stamp.nsec] = sec_nsec(self.ts) + lcm_msg.header.frame_id = self.frame_id + lcm_msg.axes_length = len(self.axes) + lcm_msg.axes = self.axes + lcm_msg.buttons_length = len(self.buttons) + lcm_msg.buttons = self.buttons + return lcm_msg.lcm_encode() + + @classmethod + def lcm_decode(cls, data: bytes) -> Joy: + lcm_msg = LCMJoy.lcm_decode(data) + return cls( + ts=lcm_msg.header.stamp.sec + (lcm_msg.header.stamp.nsec / 1_000_000_000), + frame_id=lcm_msg.header.frame_id, + axes=list(lcm_msg.axes) if lcm_msg.axes else [], + buttons=list(lcm_msg.buttons) if lcm_msg.buttons else [], + ) + + def __str__(self) -> str: + return ( + f"Joy(axes={len(self.axes)} values, buttons={len(self.buttons)} values, " + f"frame_id='{self.frame_id}')" + ) + + def __repr__(self) -> str: + return ( + f"Joy(ts={self.ts}, frame_id='{self.frame_id}', " + f"axes={self.axes}, buttons={self.buttons})" + ) + + def __eq__(self, other) -> bool: + """Check if two Joy messages are equal.""" + if not isinstance(other, Joy): + return False + return ( + self.axes == other.axes + and self.buttons == other.buttons + and self.frame_id == other.frame_id + ) + + @classmethod + def from_ros_msg(cls, ros_msg: ROSJoy) -> "Joy": + """Create a Joy from a ROS sensor_msgs/Joy message. + + Args: + ros_msg: ROS Joy message + + Returns: + Joy instance + """ + # Convert timestamp from ROS header + ts = ros_msg.header.stamp.sec + (ros_msg.header.stamp.nanosec / 1_000_000_000) + + return cls( + ts=ts, + frame_id=ros_msg.header.frame_id, + axes=list(ros_msg.axes), + buttons=list(ros_msg.buttons), + ) + + def to_ros_msg(self) -> ROSJoy: + """Convert to a ROS sensor_msgs/Joy message. + + Returns: + ROS Joy message + """ + ros_msg = ROSJoy() + + # Set header + ros_msg.header.frame_id = self.frame_id + ros_msg.header.stamp.sec = int(self.ts) + ros_msg.header.stamp.nanosec = int((self.ts - int(self.ts)) * 1_000_000_000) + + # Set axes and buttons + ros_msg.axes = self.axes + ros_msg.buttons = self.buttons + + return ros_msg diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py index 8004032ceb..d81c8d0198 100644 --- a/dimos/msgs/sensor_msgs/PointCloud2.py +++ b/dimos/msgs/sensor_msgs/PointCloud2.py @@ -14,6 +14,7 @@ from __future__ import annotations +import functools import struct import time from typing import Optional @@ -28,6 +29,8 @@ from dimos_lcm.sensor_msgs.PointField import PointField from dimos_lcm.std_msgs.Header import Header +from dimos.msgs.geometry_msgs import Vector3 + # Import ROS types try: from sensor_msgs.msg import PointCloud2 as ROSPointCloud2 @@ -73,14 +76,83 @@ def from_numpy( pcd.points = o3d.utility.Vector3dVector(points) return cls(pointcloud=pcd, ts=timestamp, frame_id=frame_id) + def __str__(self) -> str: + return f"PointCloud2(frame_id='{self.frame_id}', num_points={len(self.pointcloud.points)})" + + @functools.cached_property + def center(self) -> Vector3: + """Calculate the center of the pointcloud in world frame.""" + center = np.asarray(self.pointcloud.points).mean(axis=0) + return Vector3(*center) + def points(self): return self.pointcloud.points + def __add__(self, other: PointCloud2) -> PointCloud2: + """Combine two PointCloud2 instances into one. + + The resulting point cloud contains points from both inputs. + The frame_id and timestamp are taken from the first point cloud. + + Args: + other: Another PointCloud2 instance to combine with + + Returns: + New PointCloud2 instance containing combined points + """ + if not isinstance(other, PointCloud2): + raise ValueError("Can only add PointCloud2 to another PointCloud2") + + return PointCloud2( + pointcloud=self.pointcloud + other.pointcloud, + frame_id=self.frame_id, + ts=max(self.ts, other.ts), + ) + # TODO what's the usual storage here? is it already numpy? def as_numpy(self) -> np.ndarray: """Get points as numpy array.""" return np.asarray(self.pointcloud.points) + @functools.cache + def get_axis_aligned_bounding_box(self) -> o3d.geometry.AxisAlignedBoundingBox: + """Get axis-aligned bounding box of the point cloud.""" + return self.pointcloud.get_axis_aligned_bounding_box() + + @functools.cache + def get_oriented_bounding_box(self) -> o3d.geometry.OrientedBoundingBox: + """Get oriented bounding box of the point cloud.""" + return self.pointcloud.get_oriented_bounding_box() + + @functools.cache + def get_bounding_box_dimensions(self) -> tuple[float, float, float]: + """Get dimensions (width, height, depth) of axis-aligned bounding box.""" + bbox = self.get_axis_aligned_bounding_box() + extent = bbox.get_extent() + return tuple(extent) + + def bounding_box_intersects(self, other: "PointCloud2") -> bool: + # Get axis-aligned bounding boxes + bbox1 = self.get_axis_aligned_bounding_box() + bbox2 = other.get_axis_aligned_bounding_box() + + # Get min and max bounds + min1 = bbox1.get_min_bound() + max1 = bbox1.get_max_bound() + min2 = bbox2.get_min_bound() + max2 = bbox2.get_max_bound() + + # Check overlap in all three dimensions + # Boxes intersect if they overlap in ALL dimensions + return ( + min1[0] <= max2[0] + and max1[0] >= min2[0] + and min1[1] <= max2[1] + and max1[1] >= min2[1] + and min1[2] <= max2[2] + and max1[2] >= min2[2] + ) + def lcm_encode(self, frame_id: Optional[str] = None) -> bytes: """Convert to LCM PointCloud2 message.""" msg = LCMPointCloud2() diff --git a/dimos/msgs/sensor_msgs/__init__.py b/dimos/msgs/sensor_msgs/__init__.py index a7afafe2f2..9a8a7b54fe 100644 --- a/dimos/msgs/sensor_msgs/__init__.py +++ b/dimos/msgs/sensor_msgs/__init__.py @@ -1,3 +1,4 @@ from dimos.msgs.sensor_msgs.Image import Image, ImageFormat from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2 from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo +from dimos.msgs.sensor_msgs.Joy import Joy diff --git a/dimos/msgs/sensor_msgs/test_Joy.py b/dimos/msgs/sensor_msgs/test_Joy.py new file mode 100644 index 0000000000..fd11624b08 --- /dev/null +++ b/dimos/msgs/sensor_msgs/test_Joy.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import time + +try: + from sensor_msgs.msg import Joy as ROSJoy + from std_msgs.msg import Header as ROSHeader + + ROS_AVAILABLE = True +except ImportError: + ROSJoy = None + ROSHeader = None + ROS_AVAILABLE = False + +from dimos.msgs.sensor_msgs.Joy import Joy + + +def test_lcm_encode_decode(): + """Test LCM encode/decode preserves Joy data.""" + print("Testing Joy LCM encode/decode...") + + # Create test joy message with sample gamepad data + original = Joy( + ts=1234567890.123456789, + frame_id="gamepad", + axes=[0.5, -0.25, 1.0, -1.0, 0.0, 0.75], # 6 axes (e.g., left/right sticks + triggers) + buttons=[1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0], # 12 buttons + ) + + # Encode to LCM bytes + encoded = original.lcm_encode() + assert isinstance(encoded, bytes) + assert len(encoded) > 0 + + # Decode back + decoded = Joy.lcm_decode(encoded) + + # Verify all fields match + assert abs(decoded.ts - original.ts) < 1e-9 + assert decoded.frame_id == original.frame_id + assert decoded.axes == original.axes + assert decoded.buttons == original.buttons + + print("✓ Joy LCM encode/decode test passed") + + +def test_initialization_methods(): + """Test various initialization methods for Joy.""" + print("Testing Joy initialization methods...") + + # Test default initialization + joy1 = Joy() + assert joy1.axes == [] + assert joy1.buttons == [] + assert joy1.frame_id == "" + assert joy1.ts > 0 # Should have current time + + # Test full initialization + joy2 = Joy(ts=1234567890.0, frame_id="xbox_controller", axes=[0.1, 0.2, 0.3], buttons=[1, 0, 1]) + assert joy2.ts == 1234567890.0 + assert joy2.frame_id == "xbox_controller" + assert joy2.axes == [0.1, 0.2, 0.3] + assert joy2.buttons == [1, 0, 1] + + # Test tuple initialization + joy3 = Joy(([0.5, -0.5], [1, 1, 0])) + assert joy3.axes == [0.5, -0.5] + assert joy3.buttons == [1, 1, 0] + + # Test dict initialization + joy4 = Joy({"axes": [0.7, 0.8], "buttons": [0, 1], "frame_id": "ps4_controller"}) + assert joy4.axes == [0.7, 0.8] + assert joy4.buttons == [0, 1] + assert joy4.frame_id == "ps4_controller" + + # Test copy constructor + joy5 = Joy(joy2) + assert joy5.ts == joy2.ts + assert joy5.frame_id == joy2.frame_id + assert joy5.axes == joy2.axes + assert joy5.buttons == joy2.buttons + assert joy5 is not joy2 # Different objects + + print("✓ Joy initialization methods test passed") + + +def test_equality(): + """Test Joy equality comparison.""" + print("Testing Joy equality...") + + joy1 = Joy(ts=1000.0, frame_id="controller1", axes=[0.5, -0.5], buttons=[1, 0, 1]) + + joy2 = Joy(ts=1000.0, frame_id="controller1", axes=[0.5, -0.5], buttons=[1, 0, 1]) + + joy3 = Joy( + ts=1000.0, + frame_id="controller2", # Different frame_id + axes=[0.5, -0.5], + buttons=[1, 0, 1], + ) + + joy4 = Joy( + ts=1000.0, + frame_id="controller1", + axes=[0.6, -0.5], # Different axes + buttons=[1, 0, 1], + ) + + # Same content should be equal + assert joy1 == joy2 + + # Different frame_id should not be equal + assert joy1 != joy3 + + # Different axes should not be equal + assert joy1 != joy4 + + # Different type should not be equal + assert joy1 != "not a joy" + assert joy1 != 42 + + print("✓ Joy equality test passed") + + +def test_string_representation(): + """Test Joy string representations.""" + print("Testing Joy string representations...") + + joy = Joy( + ts=1234567890.123, + frame_id="test_controller", + axes=[0.1, -0.2, 0.3, 0.4], + buttons=[1, 0, 1, 0, 0, 1], + ) + + # Test __str__ + str_repr = str(joy) + assert "Joy" in str_repr + assert "axes=4 values" in str_repr + assert "buttons=6 values" in str_repr + assert "test_controller" in str_repr + + # Test __repr__ + repr_str = repr(joy) + assert "Joy" in repr_str + assert "1234567890.123" in repr_str + assert "test_controller" in repr_str + assert "[0.1, -0.2, 0.3, 0.4]" in repr_str + assert "[1, 0, 1, 0, 0, 1]" in repr_str + + print("✓ Joy string representation test passed") + + +@pytest.mark.skipif(not ROS_AVAILABLE, reason="ROS not available") +def test_ros_conversion(): + """Test conversion to/from ROS Joy messages.""" + print("Testing Joy ROS conversion...") + + # Create a ROS Joy message + ros_msg = ROSJoy() + ros_msg.header = ROSHeader() + ros_msg.header.stamp.sec = 1234567890 + ros_msg.header.stamp.nanosec = 123456789 + ros_msg.header.frame_id = "ros_gamepad" + ros_msg.axes = [0.25, -0.75, 0.0, 1.0, -1.0] + ros_msg.buttons = [1, 1, 0, 0, 1, 0, 1, 0] + + # Convert from ROS + joy = Joy.from_ros_msg(ros_msg) + assert abs(joy.ts - 1234567890.123456789) < 1e-9 + assert joy.frame_id == "ros_gamepad" + assert joy.axes == [0.25, -0.75, 0.0, 1.0, -1.0] + assert joy.buttons == [1, 1, 0, 0, 1, 0, 1, 0] + + # Convert back to ROS + ros_msg2 = joy.to_ros_msg() + assert ros_msg2.header.frame_id == "ros_gamepad" + assert ros_msg2.header.stamp.sec == 1234567890 + assert abs(ros_msg2.header.stamp.nanosec - 123456789) < 100 # Allow small rounding + assert list(ros_msg2.axes) == [0.25, -0.75, 0.0, 1.0, -1.0] + assert list(ros_msg2.buttons) == [1, 1, 0, 0, 1, 0, 1, 0] + + print("✓ Joy ROS conversion test passed") + + +def test_edge_cases(): + """Test Joy with edge cases.""" + print("Testing Joy edge cases...") + + # Empty axes and buttons + joy1 = Joy(axes=[], buttons=[]) + assert joy1.axes == [] + assert joy1.buttons == [] + encoded = joy1.lcm_encode() + decoded = Joy.lcm_decode(encoded) + assert decoded.axes == [] + assert decoded.buttons == [] + + # Large number of axes and buttons + many_axes = [float(i) / 100.0 for i in range(20)] + many_buttons = [i % 2 for i in range(32)] + joy2 = Joy(axes=many_axes, buttons=many_buttons) + assert len(joy2.axes) == 20 + assert len(joy2.buttons) == 32 + encoded = joy2.lcm_encode() + decoded = Joy.lcm_decode(encoded) + # Check axes with floating point tolerance + assert len(decoded.axes) == len(many_axes) + for i, (a, b) in enumerate(zip(decoded.axes, many_axes)): + assert abs(a - b) < 1e-6, f"Axis {i}: {a} != {b}" + assert decoded.buttons == many_buttons + + # Extreme axis values + extreme_axes = [-1.0, 1.0, 0.0, -0.999999, 0.999999] + joy3 = Joy(axes=extreme_axes) + assert joy3.axes == extreme_axes + + print("✓ Joy edge cases test passed") + + +if __name__ == "__main__": + test_lcm_encode_decode() + test_initialization_methods() + test_equality() + test_string_representation() + if ROS_AVAILABLE: + test_ros_conversion() + test_edge_cases() + print("\nAll Joy tests passed! ✓") diff --git a/dimos/msgs/sensor_msgs/test_PointCloud2.py b/dimos/msgs/sensor_msgs/test_PointCloud2.py index ac115462be..cb18d6fd9d 100644 --- a/dimos/msgs/sensor_msgs/test_PointCloud2.py +++ b/dimos/msgs/sensor_msgs/test_PointCloud2.py @@ -234,6 +234,80 @@ def test_ros_conversion(): print("\n✓ All ROS conversion tests passed!") +def test_bounding_box_intersects(): + """Test bounding_box_intersects method with various scenarios.""" + # Test 1: Overlapping boxes + pc1 = PointCloud2.from_numpy(np.array([[0, 0, 0], [2, 2, 2]])) + pc2 = PointCloud2.from_numpy(np.array([[1, 1, 1], [3, 3, 3]])) + assert pc1.bounding_box_intersects(pc2) == True + assert pc2.bounding_box_intersects(pc1) == True # Should be symmetric + + # Test 2: Non-overlapping boxes + pc3 = PointCloud2.from_numpy(np.array([[0, 0, 0], [1, 1, 1]])) + pc4 = PointCloud2.from_numpy(np.array([[2, 2, 2], [3, 3, 3]])) + assert pc3.bounding_box_intersects(pc4) == False + assert pc4.bounding_box_intersects(pc3) == False + + # Test 3: Touching boxes (edge case - should be True) + pc5 = PointCloud2.from_numpy(np.array([[0, 0, 0], [1, 1, 1]])) + pc6 = PointCloud2.from_numpy(np.array([[1, 1, 1], [2, 2, 2]])) + assert pc5.bounding_box_intersects(pc6) == True + assert pc6.bounding_box_intersects(pc5) == True + + # Test 4: One box completely inside another + pc7 = PointCloud2.from_numpy(np.array([[0, 0, 0], [3, 3, 3]])) + pc8 = PointCloud2.from_numpy(np.array([[1, 1, 1], [2, 2, 2]])) + assert pc7.bounding_box_intersects(pc8) == True + assert pc8.bounding_box_intersects(pc7) == True + + # Test 5: Boxes overlapping only in 2 dimensions (not all 3) + pc9 = PointCloud2.from_numpy(np.array([[0, 0, 0], [2, 2, 1]])) + pc10 = PointCloud2.from_numpy(np.array([[1, 1, 2], [3, 3, 3]])) + assert pc9.bounding_box_intersects(pc10) == False + assert pc10.bounding_box_intersects(pc9) == False + + # Test 6: Real-world detection scenario with floating point coordinates + detection1_points = np.array( + [[-3.5, -0.3, 0.1], [-3.3, -0.2, 0.1], [-3.5, -0.3, 0.3], [-3.3, -0.2, 0.3]] + ) + pc_det1 = PointCloud2.from_numpy(detection1_points) + + detection2_points = np.array( + [[-3.4, -0.25, 0.15], [-3.2, -0.15, 0.15], [-3.4, -0.25, 0.35], [-3.2, -0.15, 0.35]] + ) + pc_det2 = PointCloud2.from_numpy(detection2_points) + + assert pc_det1.bounding_box_intersects(pc_det2) == True + + # Test 7: Single point clouds + pc_single1 = PointCloud2.from_numpy(np.array([[1.0, 1.0, 1.0]])) + pc_single2 = PointCloud2.from_numpy(np.array([[1.0, 1.0, 1.0]])) + pc_single3 = PointCloud2.from_numpy(np.array([[2.0, 2.0, 2.0]])) + + # Same point should intersect + assert pc_single1.bounding_box_intersects(pc_single2) == True + # Different points should not intersect + assert pc_single1.bounding_box_intersects(pc_single3) == False + + # Test 8: Empty point clouds + pc_empty1 = PointCloud2.from_numpy(np.array([]).reshape(0, 3)) + pc_empty2 = PointCloud2.from_numpy(np.array([]).reshape(0, 3)) + pc_nonempty = PointCloud2.from_numpy(np.array([[1.0, 1.0, 1.0]])) + + # Empty clouds should handle gracefully (Open3D returns inf bounds) + # This might raise an exception or return False - we should handle gracefully + try: + result = pc_empty1.bounding_box_intersects(pc_empty2) + # If no exception, verify behavior is consistent + assert isinstance(result, bool) + except: + # If it raises an exception, that's also acceptable for empty clouds + pass + + print("✓ All bounding box intersection tests passed!") + + if __name__ == "__main__": test_lcm_encode_decode() test_ros_conversion() + test_bounding_box_intersects() diff --git a/dimos/msgs/sensor_msgs/test_image.py b/dimos/msgs/sensor_msgs/test_image.py index 7a3c84a8b6..0af559904a 100644 --- a/dimos/msgs/sensor_msgs/test_image.py +++ b/dimos/msgs/sensor_msgs/test_image.py @@ -14,8 +14,9 @@ import numpy as np import pytest +from reactivex import operators as ops -from dimos.msgs.sensor_msgs.Image import Image, ImageFormat, sharpness_window +from dimos.msgs.sensor_msgs.Image import Image, ImageFormat, sharpness_barrier, sharpness_window from dimos.utils.data import get_data from dimos.utils.testing import TimedSensorReplay @@ -65,7 +66,7 @@ def test_opencv_conversion(img: Image): @pytest.mark.tool -def test_sharpness_detector(): +def test_sharpness_stream(): get_data("unitree_office_walk") # Preload data for testing video_store = TimedSensorReplay( "unitree_office_walk/video", autocast=lambda x: Image.from_numpy(x).to_rgb() @@ -74,55 +75,85 @@ def test_sharpness_detector(): cnt = 0 for image in video_store.iterate(): cnt = cnt + 1 - print(image.sharpness()) + print(image.sharpness) if cnt > 30: return -@pytest.mark.tool -def test_sharpness_sliding_window_foxglove(): +def test_sharpness_barrier(): import time + from unittest.mock import MagicMock + + # Create mock images with known sharpness values + # This avoids loading real data from disk + mock_images = [] + sharpness_values = [0.3711, 0.3241, 0.3067, 0.2583, 0.3665] # Just 5 images for 1 window + + for i, sharp in enumerate(sharpness_values): + img = MagicMock() + img.sharpness = sharp + img.ts = 1758912038.208 + i * 0.01 # Simulate timestamps + mock_images.append(img) + + # Track what goes into windows and what comes out + start_wall_time = None + window_contents = [] # List of (wall_time, image) + emitted_images = [] + + def track_input(img): + """Track all images going into sharpness_barrier with wall-clock time""" + nonlocal start_wall_time + wall_time = time.time() + if start_wall_time is None: + start_wall_time = wall_time + relative_time = wall_time - start_wall_time + window_contents.append((relative_time, img)) + return img + + def track_output(img): + """Track what sharpness_barrier emits""" + emitted_images.append(img) + + # Use 20Hz frequency (0.05s windows) for faster test + # Emit images at 100Hz to get ~5 per window + from reactivex import from_iterable, interval + + source = from_iterable(mock_images).pipe( + ops.zip(interval(0.01)), # 100Hz emission rate + ops.map(lambda x: x[0]), # Extract just the image + ) - from dimos.msgs.geometry_msgs import Vector3 - from dimos.protocol.pubsub.lcmpubsub import LCM, Topic - - lcm = LCM() - lcm.start() - - ping = 0 - sharp_topic = Topic("/sharp", Image) - all_topic = Topic("/all", Image) - sharpness_topic = Topic("/sharpness", Vector3) - - get_data("unitree_office_walk") # Preload data for testing - video_stream = TimedSensorReplay( - "unitree_office_walk/video", autocast=lambda x: Image.from_numpy(x).to_rgb() - ).stream() + source.pipe( + ops.do_action(track_input), # Track inputs + sharpness_barrier(20), # 20Hz = 0.05s windows + ops.do_action(track_output), # Track outputs + ).run() - # Publish all images to all_topic - video_stream.subscribe(lambda x: lcm.publish(all_topic, x)) + # Only need 0.08s for 1 full window at 20Hz plus buffer + time.sleep(0.08) - def sharpness_vector(x: Image): - nonlocal ping - sharpness = x.sharpness() - if ping: - y = 1 - ping = ping - 1 - else: - y = 0 + # Verify we got correct emissions + assert len(emitted_images) >= 1, f"Expected at least 1 emission, got {len(emitted_images)}" - return Vector3([sharpness, y, 0]) + # Group inputs by wall-clock windows and verify we got the sharpest + window_duration = 0.05 # 20Hz - video_stream.subscribe(lambda x: lcm.publish(sharpness_topic, sharpness_vector(x))) + # Test just the first window + for window_idx in range(min(1, len(emitted_images))): + window_start = window_idx * window_duration + window_end = window_start + window_duration - def pub_sharp(x: Image): - nonlocal ping - ping = 3 - lcm.publish(sharp_topic, x) + # Get all images that arrived during this wall-clock window + window_imgs = [ + img for wall_time, img in window_contents if window_start <= wall_time < window_end + ] - sharpness_window( - 1, - source=video_stream, - ).subscribe(pub_sharp) + if window_imgs: + max_sharp = max(img.sharpness for img in window_imgs) + emitted_sharp = emitted_images[window_idx].sharpness - time.sleep(120) + # Verify we emitted the sharpest + assert abs(emitted_sharp - max_sharp) < 0.0001, ( + f"Window {window_idx}: Emitted image (sharp={emitted_sharp}) " + f"is not the sharpest (max={max_sharp})" + ) diff --git a/dimos/msgs/std_msgs/Bool.py b/dimos/msgs/std_msgs/Bool.py new file mode 100644 index 0000000000..6af250277e --- /dev/null +++ b/dimos/msgs/std_msgs/Bool.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Bool message type.""" + +from typing import ClassVar + +from dimos_lcm.std_msgs import Bool as LCMBool + +try: + from std_msgs.msg import Bool as ROSBool +except ImportError: + ROSBool = None + + +class Bool(LCMBool): + """ROS-compatible Bool message.""" + + msg_name = "std_msgs.Bool" + + def __init__(self, data: bool = False): + """Initialize Bool with data value.""" + self.data = data + + @classmethod + def from_ros_msg(cls, ros_msg: ROSBool) -> "Bool": + """Create a Bool from a ROS std_msgs/Bool message. + + Args: + ros_msg: ROS Bool message + + Returns: + Bool instance + """ + return cls(data=ros_msg.data) + + def to_ros_msg(self) -> ROSBool: + """Convert to a ROS std_msgs/Bool message. + + Returns: + ROS Bool message + """ + if ROSBool is None: + raise ImportError("ROS std_msgs not available") + ros_msg = ROSBool() + ros_msg.data = bool(self.data) + return ros_msg diff --git a/dimos/msgs/std_msgs/Int8.py b/dimos/msgs/std_msgs/Int8.py new file mode 100644 index 0000000000..f87a96a9ad --- /dev/null +++ b/dimos/msgs/std_msgs/Int8.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2025 Dimensional Inc. + +"""Int32 message type.""" + +from typing import ClassVar +from dimos_lcm.std_msgs import Int8 as LCMInt8 +from std_msgs.msg import Int8 as ROSInt8 + + +class Int8(LCMInt8): + """ROS-compatible Int32 message.""" + + msg_name: ClassVar[str] = "std_msgs.Int8" + + def __init__(self, data: int = 0): + """Initialize Int8 with data value.""" + self.data = data + + @classmethod + def from_ros_msg(cls, ros_msg: ROSInt8) -> "Int8": + """Create a Bool from a ROS std_msgs/Bool message. + + Args: + ros_msg: ROS Int8 message + + Returns: + Int8 instance + """ + return cls(data=ros_msg.data) + + def to_ros_msg(self) -> ROSInt8: + """Convert to a ROS std_msgs/Bool message. + + Returns: + ROS Int8 message + """ + ros_msg = ROSInt8() + ros_msg.data = self.data + return ros_msg diff --git a/dimos/msgs/std_msgs/__init__.py b/dimos/msgs/std_msgs/__init__.py index 3ed93fa77d..d46e2ce9a3 100644 --- a/dimos/msgs/std_msgs/__init__.py +++ b/dimos/msgs/std_msgs/__init__.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from .Bool import Bool from .Header import Header from .Int32 import Int32 +from .Int8 import Int8 -__all__ = ["Header", "Int32"] +__all__ = ["Bool", "Header", "Int32", "Int8"] diff --git a/dimos/perception/detection2d/conftest.py b/dimos/perception/detection2d/conftest.py index 93d771b373..b0df16c7ec 100644 --- a/dimos/perception/detection2d/conftest.py +++ b/dimos/perception/detection2d/conftest.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import functools from typing import Optional, TypedDict import pytest from dimos_lcm.foxglove_msgs.ImageAnnotations import ImageAnnotations +from dimos_lcm.foxglove_msgs.SceneUpdate import SceneUpdate from dimos_lcm.sensor_msgs import CameraInfo, PointCloud2 +from dimos_lcm.visualization_msgs.MarkerArray import MarkerArray from dimos.core import start from dimos.core.transport import LCMTransport @@ -25,7 +26,7 @@ from dimos.msgs.sensor_msgs.Image import Image from dimos.perception.detection2d.module2D import Detection2DModule from dimos.perception.detection2d.module3D import Detection3DModule -from dimos.perception.detection2d.type import ImageDetections3D +from dimos.perception.detection2d.type import ImageDetections2D, ImageDetections3D from dimos.protocol.service import lcmservice as lcm from dimos.protocol.tf import TF from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule @@ -35,108 +36,8 @@ from dimos.utils.testing import TimedSensorReplay -class Moment(TypedDict, total=False): - odom_frame: Odometry - lidar_frame: LidarMessage - image_frame: Image - camera_info: CameraInfo - transforms: list[Transform] - tf: TF - annotations: Optional[ImageAnnotations] - detections: Optional[ImageDetections3D] - - @pytest.fixture def dimos_cluster(): dimos = start(5) yield dimos dimos.stop() - - -@pytest.fixture(scope="session") -def moment(): - data_dir = "unitree_go2_lidar_corrected" - get_data(data_dir) - - seek = 10 - - lidar_frame = TimedSensorReplay(f"{data_dir}/lidar").find_closest_seek(seek) - - image_frame = TimedSensorReplay( - f"{data_dir}/video", - ).find_closest(lidar_frame.ts) - - image_frame.frame_id = "camera_optical" - - odom_frame = TimedSensorReplay(f"{data_dir}/odom", autocast=Odometry.from_msg).find_closest( - lidar_frame.ts - ) - - transforms = ConnectionModule._odom_to_tf(odom_frame) - - tf = TF() - tf.publish(*transforms) - - return { - "odom_frame": odom_frame, - "lidar_frame": lidar_frame, - "image_frame": image_frame, - "camera_info": ConnectionModule._camera_info(), - "transforms": transforms, - "tf": tf, - } - - -@pytest.fixture(scope="session") -def publish_lcm(): - def publish(moment: Moment): - lcm.autoconf() - - lidar_frame_transport: LCMTransport = LCMTransport("/lidar", LidarMessage) - lidar_frame_transport.publish(moment.get("lidar_frame")) - - image_frame_transport: LCMTransport = LCMTransport("/image", Image) - image_frame_transport.publish(moment.get("image_frame")) - - odom_frame_transport: LCMTransport = LCMTransport("/odom", Odometry) - odom_frame_transport.publish(moment.get("odom_frame")) - - camera_info_transport: LCMTransport = LCMTransport("/camera_info", CameraInfo) - camera_info_transport.publish(moment.get("camera_info")) - - annotations = moment.get("annotations") - if annotations: - annotations_transport: LCMTransport = LCMTransport("/annotations", ImageAnnotations) - annotations_transport.publish(annotations) - - detections = moment.get("detections") - if detections: - for i, detection in enumerate(detections): - detections_transport: LCMTransport = LCMTransport( - f"/detected/pointcloud/{i}", PointCloud2 - ) - detections_transport.publish(detection.pointcloud) - - detections_image_transport: LCMTransport = LCMTransport( - f"/detected/image/{i}", Image - ) - detections_image_transport.publish(detection.cropped_image()) - - return publish - - -@pytest.fixture(scope="session") -def detections2d(moment: Moment): - return Detection2DModule().process_image_frame(moment["image_frame"]) - - -@pytest.fixture(scope="session") -def detections3d(moment: Moment): - detections2d = Detection2DModule().process_image_frame(moment["image_frame"]) - camera_transform = moment["tf"].get("camera_optical", "world") - if camera_transform is None: - raise ValueError("No camera_optical transform in tf") - - return Detection3DModule(camera_info=moment["camera_info"]).process_frame( - detections2d, moment["lidar_frame"], camera_transform - ) diff --git a/dimos/perception/detection2d/connectionModule.py b/dimos/perception/detection2d/connectionModule.py new file mode 100644 index 0000000000..1cafc3ffc9 --- /dev/null +++ b/dimos/perception/detection2d/connectionModule.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/dimos/perception/detection2d/module2D.py b/dimos/perception/detection2d/module2D.py index 92d1c0612b..82fb181be9 100644 --- a/dimos/perception/detection2d/module2D.py +++ b/dimos/perception/detection2d/module2D.py @@ -12,55 +12,158 @@ # See the License for the specific language governing permissions and # limitations under the License. import functools -from typing import Any, Callable, List, Optional +import json +import time +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Callable, Optional +import numpy as np from dimos_lcm.foxglove_msgs.ImageAnnotations import ( ImageAnnotations, ) from reactivex import operators as ops from reactivex.observable import Observable +from reactivex.subject import Subject from dimos.core import In, Module, Out, rpc +from dimos.models.vl import QwenVlModel, VlModel from dimos.msgs.sensor_msgs import Image +from dimos.msgs.sensor_msgs.Image import sharpness_barrier from dimos.msgs.vision_msgs import Detection2DArray -from dimos.perception.detection2d.type import Detection2D, ImageDetections2D +from dimos.perception.detection2d.type import ( + Detection2D, + ImageDetections2D, + InconvinientDetectionFormat, +) from dimos.perception.detection2d.yolo_2d_det import Yolo2DDetector from dimos.utils.reactive import backpressure +class Detector(ABC): + @abstractmethod + def process_image(self, image: np.ndarray) -> InconvinientDetectionFormat: ... + + +@dataclass +class Config: + detector: Optional[Callable[[Any], Detector]] = Yolo2DDetector + max_freq: float = 0.5 # hz + vlmodel: VlModel = QwenVlModel + + class Detection2DModule(Module): + config: Config + detector: Detector + image: In[Image] = None # type: ignore detections: Out[Detection2DArray] = None # type: ignore annotations: Out[ImageAnnotations] = None # type: ignore - _initDetector = Yolo2DDetector + detected_image_0: Out[Image] = None # type: ignore + detected_image_1: Out[Image] = None # type: ignore + detected_image_2: Out[Image] = None # type: ignore + + detected_image_0: Out[Image] = None # type: ignore + detected_image_1: Out[Image] = None # type: ignore + detected_image_2: Out[Image] = None # type: ignore - def __init__(self, *args, detector=Optional[Callable[[Any], Any]], **kwargs): + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - if detector: - self._detectorClass = detector - self.detector = self._initDetector() + self.config: Config = Config(**kwargs) + self.detector = self.config.detector() + self.vlmodel = self.config.vlmodel() + self.vlm_detections_subject = Subject() + + def vlm_query(self, query: str) -> ImageDetections2D: + image = self.sharp_image_stream().pipe(ops.take(1)).run() + + full_query = f"""show me a bounding boxes in pixels for this query: `{query}` + + format should be: + `[ + [label, x1, y1, x2, y2] + ... + ]` + + (etc, multiple matches are possible) + + If there's no match return `[]`. Label is whatever you think is appropriate + + Only respond with the coordinates, no other text.""" + + response = self.vlmodel.query(image, full_query) + coords = json.loads(response) + + imageDetections = ImageDetections2D(image) + + for track_id, detection_list in enumerate(coords): + if len(detection_list) != 5: + continue + name = detection_list[0] + bbox = list(map(float, detection_list[1:])) + imageDetections.detections.append( + Detection2D( + bbox=bbox, + track_id=track_id, + class_id=-100, + confidence=1.0, + name=name, + ts=time.time(), + image=image, + ) + ) + + print("vlm detected", imageDetections) + # Emit the VLM detections to the subject + self.vlm_detections_subject.on_next(imageDetections) + + return imageDetections def process_image_frame(self, image: Image) -> ImageDetections2D: - detections = ImageDetections2D.from_detector( + print("Processing image frame for detections", image) + return ImageDetections2D.from_detector( image, self.detector.process_image(image.to_opencv()) ) - return detections @functools.cache - def detection_stream(self) -> Observable[ImageDetections2D]: - return backpressure(self.image.observable().pipe(ops.map(self.process_image_frame))) + def sharp_image_stream(self) -> Observable[Image]: + return backpressure( + self.image.observable().pipe( + sharpness_barrier(self.config.max_freq), + ) + ) + + @functools.cache + def detection_stream_2d(self) -> Observable[ImageDetections2D]: + # self.vlm_detections_subject + # Regular detection stream from the detector + regular_detections = self.sharp_image_stream().pipe(ops.map(self.process_image_frame)) + # Merge with VL model detections + return backpressure(regular_detections.pipe(ops.merge(self.vlm_detections_subject))) @rpc def start(self): - self.detection_stream().subscribe( - lambda det: self.detections.publish(det.to_ros_detection2d_array()) - ) + # self.detection_stream_2d().subscribe( + # lambda det: self.detections.publish(det.to_ros_detection2d_array()) + # ) - self.detection_stream().subscribe( + def publish_cropped_images(detections: ImageDetections2D): + for index, detection in enumerate(detections[:3]): + image_topic = getattr(self, "detected_image_" + str(index)) + image_topic.publish(detection.cropped_image()) + + self.detection_stream_2d().subscribe( lambda det: self.annotations.publish(det.to_foxglove_annotations()) ) + def publish_cropped(detections: ImageDetections2D): + for index, detection in enumerate(detections[:3]): + image_topic = getattr(self, "detected_image_" + str(index)) + image_topic.publish(detection.cropped_image()) + + self.detection_stream_2d().subscribe(publish_cropped) + @rpc def stop(self): ... diff --git a/dimos/perception/detection2d/module3D.py b/dimos/perception/detection2d/module3D.py index 98a1dc14af..15e671e171 100644 --- a/dimos/perception/detection2d/module3D.py +++ b/dimos/perception/detection2d/module3D.py @@ -12,32 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -import time -from typing import List, Optional, Tuple -import numpy as np -from dimos_lcm.foxglove_msgs.ImageAnnotations import ( - ImageAnnotations, -) from dimos_lcm.sensor_msgs import CameraInfo from reactivex import operators as ops +from reactivex.observable import Observable from dimos.core import In, Out, rpc from dimos.msgs.geometry_msgs import Transform from dimos.msgs.sensor_msgs import Image, PointCloud2 -from dimos.msgs.vision_msgs import Detection2DArray from dimos.perception.detection2d.module2D import Detection2DModule - -# from dimos.perception.detection2d.detic import Detic2DDetector from dimos.perception.detection2d.type import ( - Detection2D, ImageDetections2D, ImageDetections3D, ) - -# Type aliases for clarity -ImageDetections = Tuple[Image, List[Detection2D]] -ImageDetection = Tuple[Image, Detection2D] +from dimos.perception.detection2d.type.detection3d import Detection3D +from dimos.types.timestamped import align_timestamped +from dimos.utils.reactive import backpressure class Detection3DModule(Detection2DModule): @@ -45,164 +35,20 @@ class Detection3DModule(Detection2DModule): image: In[Image] = None # type: ignore pointcloud: In[PointCloud2] = None # type: ignore - # type: ignore - detections: Out[Detection2DArray] = None # type: ignore - annotations: Out[ImageAnnotations] = None # type: ignore detected_pointcloud_0: Out[PointCloud2] = None # type: ignore detected_pointcloud_1: Out[PointCloud2] = None # type: ignore detected_pointcloud_2: Out[PointCloud2] = None # type: ignore - detected_image_0: Out[Image] = None # type: ignore - detected_image_1: Out[Image] = None # type: ignore - detected_image_2: Out[Image] = None # type: ignore + + detection_3d_stream: Observable[ImageDetections3D] = None def __init__(self, camera_info: CameraInfo, *args, **kwargs): - self.camera_info = camera_info super().__init__(*args, **kwargs) - - def detect(self, image: Image) -> ImageDetections: - detections = Detection2D.from_detector( - self.detector.process_image(image.to_opencv()), ts=image.ts - ) - return (image, detections) - - def project_points_to_camera( - self, - points_3d: np.ndarray, - camera_matrix: np.ndarray, - extrinsics: Transform, - ) -> Tuple[np.ndarray, np.ndarray]: - """Project 3D points to 2D camera coordinates.""" - # Transform points from world to camera_optical frame - points_homogeneous = np.hstack([points_3d, np.ones((points_3d.shape[0], 1))]) - extrinsics_matrix = extrinsics.to_matrix() - points_camera = (extrinsics_matrix @ points_homogeneous.T).T - - # Filter out points behind the camera - valid_mask = points_camera[:, 2] > 0 - points_camera = points_camera[valid_mask] - - # Project to 2D - points_2d_homogeneous = (camera_matrix @ points_camera[:, :3].T).T - points_2d = points_2d_homogeneous[:, :2] / points_2d_homogeneous[:, 2:3] - - return points_2d, valid_mask - - def filter_points_in_detections( - self, - detections: ImageDetections2D, - pointcloud: PointCloud2, - world_to_camera_transform: Transform, - ) -> List[Optional[PointCloud2]]: - """Filter lidar points that fall within detection bounding boxes.""" - # Extract camera parameters - camera_info = self.camera_info - fx, fy, cx = camera_info.K[0], camera_info.K[4], camera_info.K[2] - cy = camera_info.K[5] - image_width = camera_info.width - image_height = camera_info.height - - camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) - - # Convert pointcloud to numpy array - lidar_points = pointcloud.as_numpy() - - # Project all points to camera frame - points_2d_all, valid_mask = self.project_points_to_camera( - lidar_points, camera_matrix, world_to_camera_transform - ) - valid_3d_points = lidar_points[valid_mask] - points_2d = points_2d_all.copy() - - # Filter points within image bounds - in_image_mask = ( - (points_2d[:, 0] >= 0) - & (points_2d[:, 0] < image_width) - & (points_2d[:, 1] >= 0) - & (points_2d[:, 1] < image_height) - ) - points_2d = points_2d[in_image_mask] - valid_3d_points = valid_3d_points[in_image_mask] - - filtered_pointclouds: List[Optional[PointCloud2]] = [] - - for detection in detections: - # Extract bbox from Detection2D object - bbox = detection.bbox - x_min, y_min, x_max, y_max = bbox - - # Find points within this detection box (with small margin) - margin = 5 # pixels - in_box_mask = ( - (points_2d[:, 0] >= x_min - margin) - & (points_2d[:, 0] <= x_max + margin) - & (points_2d[:, 1] >= y_min - margin) - & (points_2d[:, 1] <= y_max + margin) - ) - - detection_points = valid_3d_points[in_box_mask] - - # Create PointCloud2 message for this detection - if detection_points.shape[0] > 0: - detection_pointcloud = PointCloud2.from_numpy( - detection_points, - frame_id=pointcloud.frame_id, - timestamp=pointcloud.ts, - ) - filtered_pointclouds.append(detection_pointcloud) - else: - filtered_pointclouds.append(None) - - return filtered_pointclouds - - def combine_pointclouds(self, pointcloud_list: List[PointCloud2]) -> PointCloud2: - """Combine multiple pointclouds into a single one.""" - # Filter out None values - valid_pointclouds = [pc for pc in pointcloud_list if pc is not None] - - if not valid_pointclouds: - # Return empty pointcloud if no valid pointclouds - return PointCloud2.from_numpy( - np.array([]).reshape(0, 3), frame_id="world", timestamp=time.time() - ) - - # Combine all point arrays - all_points = np.vstack([pc.as_numpy() for pc in valid_pointclouds]) - - # Use frame_id and timestamp from first pointcloud - combined_pointcloud = PointCloud2.from_numpy( - all_points, - frame_id=valid_pointclouds[0].frame_id, - timestamp=valid_pointclouds[0].ts, - ) - - return combined_pointcloud - - def hidden_point_removal( - self, camera_transform: Transform, pc: PointCloud2, radius: float = 100.0 - ): - camera_position = camera_transform.inverse().translation - camera_pos_np = camera_position.to_numpy().reshape(3, 1) - - pcd = pc.pointcloud - try: - _, visible_indices = pcd.hidden_point_removal(camera_pos_np, radius) - visible_pcd = pcd.select_by_index(visible_indices) - - return PointCloud2(visible_pcd, frame_id=pc.frame_id, ts=pc.ts) - except Exception as e: - return pc - - def cleanup_pointcloud(self, pc: PointCloud2) -> PointCloud2: - height = pc.filter_by_height(-0.05) - statistical, _ = height.pointcloud.remove_statistical_outlier( - nb_neighbors=20, std_ratio=2.0 - ) - return PointCloud2(statistical, pc.frame_id, pc.ts) + self.camera_info = camera_info + Detection2DModule.__init__(self, *args, **kwargs) def process_frame( self, - # these have to be timestamp aligned detections: ImageDetections2D, pointcloud: PointCloud2, transform: Transform, @@ -210,57 +56,48 @@ def process_frame( if not transform: return ImageDetections3D(detections.image, []) - pointcloud_list = self.filter_points_in_detections(detections, pointcloud, transform) - + print("3d projection", detections, pointcloud, transform) detection3d_list = [] - for detection, pc in zip(detections, pointcloud_list): - if pc is None: - continue - pc = self.hidden_point_removal(transform, self.cleanup_pointcloud(pc)) - if pc is None: - continue - - detection3d_list.append(detection.to_3d(pointcloud=pc, transform=transform)) + for detection in detections: + detection3d = Detection3D.from_2d( + detection, + world_pointcloud=pointcloud, + camera_info=self.camera_info, + world_to_optical_transform=transform, + ) + if detection3d is not None: + detection3d_list.append(detection3d) - return ImageDetections3D(detections.image, detection3d_list) + ret = ImageDetections3D(detections.image, detection3d_list) + print("3d projection finished", ret) + return ret @rpc def start(self): - time_tolerance = 5.0 # seconds + super().start() def detection2d_to_3d(args): detections, pc = args - transform = self.tf.get("camera_optical", "world", detections.image.ts, time_tolerance) + transform = self.tf.get("camera_optical", pc.frame_id, detections.image.ts, 5.0) return self.process_frame(detections, pc, transform) - combined_stream = self.detection_stream().pipe( - ops.with_latest_from(self.pointcloud.observable()), ops.map(detection2d_to_3d) - ) - - self.detection_stream().subscribe( - lambda det: self.detections.publish(det.to_ros_detection2d_array()) - ) + self.detection_stream_3d = align_timestamped( + backpressure(self.detection_stream_2d()), + self.pointcloud.observable(), + match_tolerance=0.25, + buffer_size=20.0, + ).pipe(ops.map(detection2d_to_3d)) - self.detection_stream().subscribe( - lambda det: self.annotations.publish(det.to_image_annotations()) - ) + # self.detection_stream_3d = backpressure(self.detection_stream_2d()).pipe( + # ops.with_latest_from(self.pointcloud.observable()), ops.map(detection2d_to_3d) + # ) - combined_stream.subscribe(self._handle_combined_detections) + self.detection_stream_3d.subscribe(self._publish_detections) - def _handle_combined_detections(self, detections: ImageDetections3D): + def _publish_detections(self, detections: ImageDetections3D): if not detections: return - print(detections) - - if len(detections) > 0: - self.detected_pointcloud_0.publish(detections[0].pointcloud) - self.detected_image_0.publish(detections[0].cropped_image()) - - if len(detections) > 1: - self.detected_pointcloud_1.publish(detections[1].pointcloud) - self.detected_image_1.publish(detections[1].cropped_image()) - - if len(detections) > 3: - self.detected_pointcloud_2.publish(detections[2].pointcloud) - self.detected_image_2.publish(detections[2].cropped_image()) + for index, detection in enumerate(detections[:3]): + pointcloud_topic = getattr(self, "detected_pointcloud_" + str(index)) + pointcloud_topic.publish(detection.pointcloud) diff --git a/dimos/perception/detection2d/moduleDB.py b/dimos/perception/detection2d/moduleDB.py index 8e9205c4f3..4b1dd7d588 100644 --- a/dimos/perception/detection2d/moduleDB.py +++ b/dimos/perception/detection2d/moduleDB.py @@ -11,46 +11,362 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import functools +import threading import time -from typing import List, Optional, Tuple +from copy import copy +from typing import Any, Callable, Dict, List, Optional -import numpy as np -from dimos_lcm.foxglove_msgs.ImageAnnotations import ( - ImageAnnotations, -) -from dimos_lcm.sensor_msgs import CameraInfo -from dimos_lcm.vision_msgs import Detection2D as ROSDetection2D -from reactivex import operators as ops +from dimos_lcm.foxglove_msgs.ImageAnnotations import ImageAnnotations +from lcm_msgs.foxglove_msgs import SceneUpdate +from reactivex.observable import Observable +from dimos.agents2 import Agent, Output, Reducer, Stream, skill from dimos.core import In, Out, rpc -from dimos.msgs.geometry_msgs import Transform +from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Vector3 from dimos.msgs.sensor_msgs import Image, PointCloud2 -from dimos.msgs.vision_msgs import Detection2DArray, Detection3DArray -from dimos.perception.detection2d.module2D import Detection2DModule +from dimos.msgs.vision_msgs import Detection2DArray from dimos.perception.detection2d.module3D import Detection3DModule -from dimos.perception.detection2d.type import ( - Detection2D, - Detection3D, - ImageDetections2D, - ImageDetections3D, -) -from dimos.protocol.skill import skill +from dimos.perception.detection2d.type import Detection3D, ImageDetections3D, TableStr +from dimos.protocol.skill.skill import skill +from dimos.protocol.skill.type import Output, Reducer, Stream +from dimos.types.timestamped import to_datetime -class DetectionDBModule(Detection3DModule): - @rpc - def start(self): - super().start() - self.pointcloud_stream().subscribe(self.add_detections) +# Represents an object in space, as collection of 3d detections over time +class Object3D(Detection3D): + best_detection: Detection3D = None + center: Vector3 = None + track_id: str = None + detections: List[Detection3D] + + def to_repr_dict(self) -> Dict[str, Any]: + return { + "object_id": self.track_id, + "detections": len(self.detections), + "center": "[" + ", ".join(list(map(lambda n: f"{n:1f}", self.center.to_list()))) + "]", + } + + def __init__(self, track_id: str, detection: Optional[Detection3D] = None, *args, **kwargs): + if detection is None: + self.detections = [] + return + self.ts = detection.ts + self.track_id = track_id + self.class_id = detection.class_id + self.name = detection.name + self.confidence = detection.confidence + self.pointcloud = detection.pointcloud + self.bbox = detection.bbox + self.transform = detection.transform + self.center = detection.center + self.frame_id = detection.frame_id + self.detections = [detection] + self.best_detection = detection + + def __add__(self, detection: Detection3D) -> "Object3D": + new_object = Object3D(self.track_id) + new_object.bbox = detection.bbox + new_object.confidence = max(self.confidence, detection.confidence) + new_object.ts = max(self.ts, detection.ts) + new_object.track_id = self.track_id + new_object.class_id = self.class_id + new_object.name = self.name + new_object.transform = self.transform + new_object.pointcloud = self.pointcloud + detection.pointcloud + new_object.frame_id = self.frame_id + new_object.center = (self.center + detection.center) / 2 + new_object.detections = self.detections + [detection] + + if detection.bbox_2d_volume() > self.bbox_2d_volume(): + new_object.best_detection = detection + else: + new_object.best_detection = self.best_detection + + return new_object + + @property + def image(self) -> Image: + return self.best_detection.image + + def scene_entity_label(self) -> str: + return f"{self.name} ({len(self.detections)})" + + def agent_encode(self): + return { + "id": self.track_id, + "name": self.name, + "detections": len(self.detections), + "last_seen": f"{round((time.time() - self.ts))}s ago", + # "position": self.to_pose().position.agent_encode(), + } + + def to_pose(self) -> PoseStamped: + optical_inverse = Transform( + translation=Vector3(0.0, 0.0, 0.0), + rotation=Quaternion(-0.5, 0.5, -0.5, 0.5), + frame_id="camera_link", + child_frame_id="camera_optical", + ).inverse() + + print("transform is", self.best_detection.transform) + + global_transform = optical_inverse + self.best_detection.transform + + print("inverse optical is", global_transform) + + print("obj center is", self.center) + global_pose = global_transform.to_pose() + print("Global pose:", global_pose) + global_pose.frame_id = self.best_detection.frame_id + print("remap to", self.best_detection.frame_id) + return PoseStamped( + position=self.center, orientation=Quaternion(), frame_id=self.best_detection.frame_id + ) + - def add_detections(self, detections: List[Detection3DArray]): - for det in detections: - self.add_detection(det) +class ObjectDBModule(Detection3DModule, TableStr): + cnt: int = 0 + objects: dict[str, Object3D] + object_stream: Observable[Object3D] = None + + goto: Callable[[PoseStamped], Any] = None + + image: In[Image] = None # type: ignore + pointcloud: In[PointCloud2] = None # type: ignore + + detections: Out[Detection2DArray] = None # type: ignore + annotations: Out[ImageAnnotations] = None # type: ignore + + detected_pointcloud_0: Out[PointCloud2] = None # type: ignore + detected_pointcloud_1: Out[PointCloud2] = None # type: ignore + detected_pointcloud_2: Out[PointCloud2] = None # type: ignore + + detected_image_0: Out[Image] = None # type: ignore + detected_image_1: Out[Image] = None # type: ignore + detected_image_2: Out[Image] = None # type: ignore + + scene_update: Out[SceneUpdate] = None # type: ignore + + target: Out[PoseStamped] = None # type: ignore + + remembered_locations: Dict[str, PoseStamped] + + def __init__(self, goto: Callable[[PoseStamped], Any], *args, **kwargs): + super().__init__(*args, **kwargs) + self.goto = goto + self.objects = {} + self.remembered_locations = {} + + def closest_object(self, detection: Detection3D) -> Optional[Object3D]: + # Filter objects to only those with matching names + matching_objects = [obj for obj in self.objects.values() if obj.name == detection.name] + + if not matching_objects: + return None + + # Sort by distance + distances = sorted(matching_objects, key=lambda obj: detection.center.distance(obj.center)) + + return distances[0] + + def add_detections(self, detections: List[Detection3D]) -> List[Object3D]: + return [ + detection for detection in map(self.add_detection, detections) if detection is not None + ] def add_detection(self, detection: Detection3D): - print("DETECTION", detection) + """Add detection to existing object or create new one.""" + closest = self.closest_object(detection) + if closest and closest.bounding_box_intersects(detection): + return self.add_to_object(closest, detection) + else: + return self.create_new_object(detection) + + def add_to_object(self, closest: Object3D, detection: Detection3D): + new_object = closest + detection + self.objects[closest.track_id] = new_object + return new_object + + def create_new_object(self, detection: Detection3D): + new_object = Object3D(f"obj_{self.cnt}", detection) + self.objects[new_object.track_id] = new_object + self.cnt += 1 + return new_object + + def agent_encode(self) -> List[Any]: + ret = [] + for obj in copy(self.objects).values(): + # we need at least 3 detectieons to consider it a valid object + # for this to be serious we need a ratio of detections within the window of observations + # if len(obj.detections) < 3: + # continue + ret.append(str(obj.agent_encode())) + if not ret: + return "No objects detected yet." + return "\n".join(ret) + + def vlm_query(self, description: str) -> str: + imageDetections2D = super().vlm_query(description) + print("VLM query found", imageDetections2D, "detections") + time.sleep(3) + + if not imageDetections2D.detections: + return None + + ret = [] + for obj in self.objects.values(): + if obj.ts != imageDetections2D.ts: + print( + "Skipping", + obj.track_id, + "ts", + obj.ts, + "!=", + imageDetections2D.ts, + ) + continue + if obj.class_id != -100: + continue + if obj.name != imageDetections2D.detections[0].name: + print("Skipping", obj.name, "!=", imageDetections2D.detections[0].name) + continue + ret.append(obj) + ret.sort(key=lambda x: x.ts) + + return ret[0] if ret else None + + @skill() + def remember_location(self, name: str) -> str: + """Remember the current location with a name.""" + transform = self.tf.get("map", "sensor", time_point=time.time(), time_tolerance=1.0) + if not transform: + return f"Could not get current location transform from map to sensor" + + pose = transform.to_pose() + pose.frame_id = "map" + self.remembered_locations[name] = pose + return f"Location '{name}' saved at position: {pose.position}" + + @skill() + def goto_remembered_location(self, name: str) -> str: + """Go to a remembered location by name.""" + pose = self.remembered_locations.get(name, None) + if not pose: + return f"Location {name} not found. Known locations: {list(self.remembered_locations.keys())}" + self.goto(pose) + return f"Navigating to remembered location {name} and pose {pose}" + + @skill() + def list_remembered_locations(self) -> List[str]: + """List all remembered locations.""" + return str(list(self.remembered_locations.keys())) + + def nav_to(self, target_pose) -> str: + target_pose.orientation = Quaternion(0.0, 0.0, 0.0, 0.0) + self.target.publish(target_pose) + time.sleep(0.1) + self.target.publish(target_pose) + self.goto(target_pose) + + @skill() + def navigate_to_object_in_view(self, query: str) -> str: + """Navigate to an object in your current image view via natural language query using vision-language model to find it.""" + target_obj = self.vlm_query(query) + if not target_obj: + return f"No objects found matching '{query}'" + return self.navigate_to_object_by_id(target_obj.track_id) + + @skill(reducer=Reducer.all) + def list_objects(self): + """List all detected objects that the system remembers and can navigate to.""" + data = self.agent_encode() + return data + + @skill() + def navigate_to_object_by_id(self, object_id: str): + """Navigate to an object by an object id""" + target_obj = self.objects.get(object_id, None) + if not target_obj: + return f"Object {object_id} not found\nHere are the known objects:\n{str(self.agent_encode())}" + target_pose = target_obj.to_pose() + target_pose.frame_id = "map" + self.target.publish(target_pose) + time.sleep(0.1) + self.target.publish(target_pose) + self.nav_to(target_pose) + return f"Navigating to f{object_id} f{target_obj.name}" def lookup(self, label: str) -> List[Detection3D]: """Look up a detection by label.""" return [] + + @rpc + def start(self): + super().start() + + def update_objects(imageDetections: ImageDetections3D): + for detection in imageDetections.detections: + # print(detection) + return self.add_detection(detection) + + def scene_thread(): + while True: + scene_update = self.to_foxglove_scene_update() + self.scene_update.publish(scene_update) + time.sleep(1.0) + + threading.Thread(target=scene_thread, daemon=True).start() + + self.detection_stream_3d.subscribe(update_objects) + + def goto_object(self, object_id: str) -> Optional[Object3D]: + """Go to object by id.""" + return self.objects.get(object_id, None) + + def to_foxglove_scene_update(self) -> "SceneUpdate": + """Convert all detections to a Foxglove SceneUpdate message. + + Returns: + SceneUpdate containing SceneEntity objects for all detections + """ + + # Create SceneUpdate message with all detections + scene_update = SceneUpdate() + scene_update.deletions_length = 0 + scene_update.deletions = [] + scene_update.entities = [] + + for obj in copy(self.objects).values(): + # we need at least 3 detectieons to consider it a valid object + # for this to be serious we need a ratio of detections within the window of observations + if obj.class_id != -100 and len(obj.detections) < 3: + continue + + # print( + # f"Object {obj.track_id}: {len(obj.detections)} detections, confidence {obj.confidence}" + # ) + # print(obj.to_pose()) + + scene_update.entities.append( + obj.to_foxglove_scene_entity( + entity_id=f"object_{obj.name}_{obj.track_id}_{len(obj.detections)}" + ) + ) + + scene_update.entities_length = len(scene_update.entities) + return scene_update + + def __len__(self): + return len(self.objects.values()) + + def __iter__(self): + return iter(self.detections.values()) + + @skill(output=Output.image) + def observe(self) -> Optional[Image]: + """Returns the latest camera frame. Use this skill for any visual world queries. + + This skill provides the current camera view for perception tasks. + Returns None if no frame has been captured yet. + """ + return self.image.get_next() diff --git a/dimos/perception/detection2d/moduleEmbeddingDB.py b/dimos/perception/detection2d/moduleEmbeddingDB.py new file mode 100644 index 0000000000..0e4bcd28ee --- /dev/null +++ b/dimos/perception/detection2d/moduleEmbeddingDB.py @@ -0,0 +1,22 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import threading +import time +from typing import Any, Dict, List, Optional + +from dimos.perception.detection2d.type import ( + Detection3D, +) +from dimos.perception.detection2d.type.detection3d import Detection3D diff --git a/dimos/perception/detection2d/spatial.py b/dimos/perception/detection2d/spatial.py new file mode 100644 index 0000000000..4d402d5a1e --- /dev/null +++ b/dimos/perception/detection2d/spatial.py @@ -0,0 +1,23 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dimos.core.module import Module +from dimos.msgs.sensor_msgs import PointCloud2 +from dimos.perception.detection2d.type import Detection2D + + +class ObjectLocalization(Module): + def add_detection(self, detection: Detection2D, pointcloud: PointCloud2): + name = detection.name + print("DETECTION", name, pointcloud) diff --git a/dimos/perception/detection2d/test_module.py b/dimos/perception/detection2d/test_module.py deleted file mode 100644 index 51ab6384a4..0000000000 --- a/dimos/perception/detection2d/test_module.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import pytest -from dimos_lcm.foxglove_msgs.ImageAnnotations import ( - ImageAnnotations, -) -from dimos_lcm.sensor_msgs import Image, PointCloud2 - -from dimos.core import LCMTransport -from dimos.msgs.geometry_msgs import PoseStamped, Transform, Vector3 -from dimos.msgs.nav_msgs import OccupancyGrid -from dimos.msgs.sensor_msgs import PointCloud2 as PointCloud2Msg -from dimos.msgs.vision_msgs import Detection2DArray -from dimos.perception.detection2d.conftest import Moment -from dimos.perception.detection2d.module2D import Detection2DModule -from dimos.perception.detection2d.module3D import Detection3DModule -from dimos.perception.detection2d.type import ( - Detection2D, - Detection3D, - ImageDetections2D, - ImageDetections3D, -) -from dimos.robot.unitree_webrtc.modular import deploy_connection, deploy_navigation -from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule -from dimos.robot.unitree_webrtc.type.lidar import LidarMessage -from dimos.robot.unitree_webrtc.type.map import Map - - -def test_module2d(moment: Moment, publish_lcm): - detections2d = Detection2DModule().process_image_frame(moment["image_frame"]) - - print(detections2d) - - # Assertions for test_module2d - assert isinstance(detections2d, ImageDetections2D) - assert len(detections2d) == 1 - assert detections2d.image.ts == pytest.approx(1757960670.490248) - assert detections2d.image.shape == (720, 1280, 3) - assert detections2d.image.frame_id == "camera_optical" - - # Check first detection - det = detections2d.detections[0] - assert isinstance(det, Detection2D) - assert det.name == "suitcase" - assert det.class_id == 28 - assert det.track_id == 1 - assert det.confidence == pytest.approx(0.8145349025726318) - - # Check bbox values - assert det.bbox == pytest.approx( - [503.437255859375, 249.89385986328125, 655.950439453125, 469.82879638671875] - ) - - annotations = detections2d.to_image_annotations() - publish_lcm({"annotations": annotations, **moment}) - - -def test_module3d(moment: Moment, publish_lcm): - detections2d = Detection2DModule().process_image_frame(moment["image_frame"]) - pointcloud = moment["lidar_frame"] - camera_transform = moment["tf"].get("camera_optical", "world") - if camera_transform is None: - raise ValueError("No camera_optical transform in tf") - annotations = detections2d.to_image_annotations() - - detections3d = Detection3DModule(camera_info=moment["camera_info"]).process_frame( - detections2d, pointcloud, camera_transform - ) - - publish_lcm( - { - **moment, - "annotations": annotations, - "detections": detections3d, - } - ) - - print(detections3d) - - # Assertions for test_module3d - assert isinstance(detections3d, ImageDetections3D) - assert len(detections3d) == 1 - assert detections3d.image.ts == pytest.approx(1757960670.490248) - assert detections3d.image.shape == (720, 1280, 3) - assert detections3d.image.frame_id == "camera_optical" - - # Check first 3D detection - det = detections3d.detections[0] - assert isinstance(det, Detection3D) - assert det.name == "suitcase" - assert det.class_id == 28 - assert det.track_id == 1 - - assert det.confidence == pytest.approx(0.8145349025726318) - - # Check bbox values (should match 2D) - assert det.bbox == pytest.approx( - [503.437255859375, 249.89385986328125, 655.950439453125, 469.82879638671875] - ) - - # 3D-specific assertions - assert isinstance(det.pointcloud, PointCloud2Msg) - assert len(det.pointcloud) == 81 - assert det.pointcloud.frame_id == "world" - assert isinstance(det.transform, Transform) - - # Check center - center = det.center - assert isinstance(center, Vector3) - # Values from output: Vector([ -3.3565 -0.26265 0.18549]) - assert center.x == pytest.approx(-3.3565, abs=1e-4) - assert center.y == pytest.approx(-0.26265, abs=1e-4) - assert center.z == pytest.approx(0.18549, abs=1e-4) - - # Check pose - pose = det.pose - assert isinstance(pose, PoseStamped) - assert pose.frame_id == "world" - assert pose.ts == det.ts - - # Check repr dict values - repr_dict = det.to_repr_dict() - assert repr_dict["dist"] == "0.88m" - assert repr_dict["points"] == "81" - - -@pytest.mark.tool -def test_module3d_replay(dimos_cluster): - connection = deploy_connection(dimos_cluster, loop=False, speed=1.0) - # mapper = deploy_navigation(dimos_cluster, connection) - mapper = dimos_cluster.deploy( - Map, voxel_size=0.5, cost_resolution=0.05, global_publish_interval=1.0 - ) - mapper.lidar.connect(connection.lidar) - mapper.global_map.transport = LCMTransport("/global_map", LidarMessage) - mapper.global_costmap.transport = LCMTransport("/global_costmap", OccupancyGrid) - mapper.local_costmap.transport = LCMTransport("/local_costmap", OccupancyGrid) - - mapper.start() - - module3D = dimos_cluster.deploy(Detection3DModule, camera_info=ConnectionModule._camera_info()) - - module3D.image.connect(connection.video) - module3D.pointcloud.connect(connection.lidar) - - module3D.annotations.transport = LCMTransport("/annotations", ImageAnnotations) - module3D.detections.transport = LCMTransport("/detections", Detection2DArray) - - module3D.detected_pointcloud_0.transport = LCMTransport("/detected/pointcloud/0", PointCloud2) - module3D.detected_pointcloud_1.transport = LCMTransport("/detected/pointcloud/1", PointCloud2) - module3D.detected_pointcloud_2.transport = LCMTransport("/detected/pointcloud/2", PointCloud2) - - module3D.detected_image_0.transport = LCMTransport("/detected/image/0", Image) - module3D.detected_image_1.transport = LCMTransport("/detected/image/1", Image) - module3D.detected_image_2.transport = LCMTransport("/detected/image/2", Image) - - module3D.start() - connection.start() - import time - - while True: - time.sleep(1) diff --git a/dimos/perception/detection2d/test_module2d.py b/dimos/perception/detection2d/test_module2d.py new file mode 100644 index 0000000000..0786f14372 --- /dev/null +++ b/dimos/perception/detection2d/test_module2d.py @@ -0,0 +1,62 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest.mock import patch + +from reactivex import of + +from dimos.hardware.camera import zed +from dimos.msgs.geometry_msgs import ( + Quaternion, + Transform, + Vector3, +) +from dimos.perception.detection2d import testing +from dimos.perception.detection2d.module2D import Detection2DModule +from dimos.perception.detection2d.module3D import Detection3DModule +from dimos.perception.detection2d.moduleDB import ObjectDBModule + + +def test_module2d_visual_query(): + moments = [] + for i in range(2): + seek_value = 3.0 + (i * 2) + moments.append(testing.get_moment(seek=seek_value, g1=True)) + + test_image_stream = of(*[m["image_frame"] for m in moments]) + module2d = Detection2DModule() + module3d = Detection3DModule(camera_info=zed.CameraInfo.SingleWebcam) + objectdb = ObjectDBModule(goto=print, camera_info=zed.CameraInfo.SingleWebcam) + cam_transform = Transform( + translation=Vector3(0.05, 0.0, 0.0), + rotation=Quaternion.from_euler(Vector3(0.0, 0.2, 0.0)), + frame_id="sensor", + child_frame_id="camera_link", + ) + + with patch.object(module2d, "sharp_image_stream", return_value=test_image_stream): + moment = moments[0] + testing.publish_moment(moment) + detections = module2d.vlm_query("a guy, laptop, laptop") + moment["detections2d"] = detections + + tf = moment["tf"] + tf.receive_transform(cam_transform) + camera_transform = tf.get("camera_optical", moment.get("lidar_frame").frame_id) + detections3d = module3d.process_frame(detections, moment["lidar_frame"], camera_transform) + moment["detections3d"] = detections3d + + objectdb.add_detections(detections3d) + + moment["objectdb"] = objectdb + testing.publish_moment(moment) diff --git a/dimos/perception/detection2d/test_module3d.py b/dimos/perception/detection2d/test_module3d.py new file mode 100644 index 0000000000..09ae53a397 --- /dev/null +++ b/dimos/perception/detection2d/test_module3d.py @@ -0,0 +1,52 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import pytest +from dimos_lcm.foxglove_msgs import ImageAnnotations, SceneUpdate +from dimos_lcm.sensor_msgs import Image, PointCloud2 + +from dimos.core import LCMTransport +from dimos.msgs.geometry_msgs import PoseStamped, Transform, Vector3 +from dimos.msgs.nav_msgs import OccupancyGrid +from dimos.msgs.sensor_msgs import PointCloud2 as PointCloud2Msg +from dimos.msgs.vision_msgs import Detection2DArray +from dimos.perception.detection2d import testing +from dimos.perception.detection2d.module2D import Detection2DModule +from dimos.perception.detection2d.module3D import Detection3DModule +from dimos.perception.detection2d.moduleDB import ObjectDBModule +from dimos.perception.detection2d.type import ( + Detection2D, + Detection3D, + ImageDetections2D, + ImageDetections3D, +) +from dimos.protocol.service import lcmservice as lcm +from dimos.robot.unitree_webrtc.modular import deploy_connection, deploy_navigation +from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule +from dimos.robot.unitree_webrtc.type.lidar import LidarMessage +from dimos.robot.unitree_webrtc.type.map import Map + + +# def test_module3d(): +# lcm.autoconf() + +# for i in range(120): +# seek_value = 10.0 + (i / 2) +# moment = testing.detections3d(seek=seek_value) + +# testing.publish_moment(moment) +# testing.publish_moment(moment) + +# time.sleep(0.1) diff --git a/dimos/perception/detection2d/test_moduleDB.py b/dimos/perception/detection2d/test_moduleDB.py new file mode 100644 index 0000000000..081d3343b4 --- /dev/null +++ b/dimos/perception/detection2d/test_moduleDB.py @@ -0,0 +1,61 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +from lcm_msgs.foxglove_msgs import SceneUpdate + +from dimos.core import LCMTransport +from dimos.msgs.foxglove_msgs import ImageAnnotations +from dimos.msgs.geometry_msgs import PoseStamped +from dimos.msgs.sensor_msgs import Image, PointCloud2 +from dimos.msgs.vision_msgs import Detection2DArray +from dimos.perception.detection2d import testing +from dimos.perception.detection2d.moduleDB import ObjectDBModule +from dimos.protocol.service import lcmservice as lcm +from dimos.robot.unitree_webrtc.modular import deploy_connection, deploy_navigation +from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule + + +def test_moduleDB(dimos_cluster): + connection = deploy_connection(dimos_cluster) + + moduleDB = dimos_cluster.deploy( + ObjectDBModule, + camera_info=ConnectionModule._camera_info(), + goto=lambda obj_id: print(f"Going to {obj_id}"), + ) + moduleDB.image.connect(connection.video) + moduleDB.pointcloud.connect(connection.lidar) + + moduleDB.annotations.transport = LCMTransport("/annotations", ImageAnnotations) + moduleDB.detections.transport = LCMTransport("/detections", Detection2DArray) + + moduleDB.detected_pointcloud_0.transport = LCMTransport("/detected/pointcloud/0", PointCloud2) + moduleDB.detected_pointcloud_1.transport = LCMTransport("/detected/pointcloud/1", PointCloud2) + moduleDB.detected_pointcloud_2.transport = LCMTransport("/detected/pointcloud/2", PointCloud2) + + moduleDB.detected_image_0.transport = LCMTransport("/detected/image/0", Image) + moduleDB.detected_image_1.transport = LCMTransport("/detected/image/1", Image) + moduleDB.detected_image_2.transport = LCMTransport("/detected/image/2", Image) + + moduleDB.scene_update.transport = LCMTransport("/scene_update", SceneUpdate) + moduleDB.target.transport = LCMTransport("/target", PoseStamped) + + connection.start() + moduleDB.start() + + time.sleep(4) + print("STARTING QUERY!!") + print("VLM RES", moduleDB.navigate_to_object_in_view("white floor")) + time.sleep(30) diff --git a/dimos/perception/detection2d/test_replay.py b/dimos/perception/detection2d/test_replay.py new file mode 100644 index 0000000000..608375fe27 --- /dev/null +++ b/dimos/perception/detection2d/test_replay.py @@ -0,0 +1,53 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import pytest +from dimos_lcm.foxglove_msgs import ImageAnnotations, SceneUpdate +from dimos_lcm.sensor_msgs import Image, PointCloud2 + +from dimos.core import LCMTransport +from dimos.msgs.geometry_msgs import PoseStamped, Transform, Vector3 +from dimos.msgs.nav_msgs import OccupancyGrid +from dimos.msgs.sensor_msgs import PointCloud2 as PointCloud2Msg +from dimos.msgs.vision_msgs import Detection2DArray +from dimos.perception.detection2d import testing +from dimos.perception.detection2d.module2D import Detection2DModule +from dimos.perception.detection2d.module3D import Detection3DModule +from dimos.perception.detection2d.moduleDB import ObjectDBModule +from dimos.perception.detection2d.type import ( + Detection2D, + Detection3D, + ImageDetections2D, + ImageDetections3D, +) +from dimos.protocol.service import lcmservice as lcm +from dimos.robot.unitree_webrtc.modular import deploy_connection, deploy_navigation +from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule +from dimos.robot.unitree_webrtc.type.lidar import LidarMessage +from dimos.robot.unitree_webrtc.type.map import Map + + +@pytest.mark.tool +def test_module3d(): + lcm.autoconf() + + for i in range(120): + seek_value = 10.0 + (i / 2) + moment = testing.objectdb(seek=seek_value) + + testing.publish_moment(moment) + testing.publish_moment(moment) + + time.sleep(0.1) diff --git a/dimos/perception/detection2d/test_type.py b/dimos/perception/detection2d/test_type.py deleted file mode 100644 index 25264ef727..0000000000 --- a/dimos/perception/detection2d/test_type.py +++ /dev/null @@ -1,239 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dimos.perception.detection2d.conftest import detections2d, detections3d -from dimos.perception.detection2d.type import ( - Detection2D, - Detection3D, - ImageDetections2D, - ImageDetections3D, -) -from dimos.msgs.sensor_msgs import PointCloud2 -from dimos.msgs.geometry_msgs import Transform, Vector3, PoseStamped - - -def test_detections2d(detections2d): - print(f"\n=== ImageDetections2D Test ===") - print(f"Type: {type(detections2d)}") - print(f"Number of detections: {len(detections2d)}") - print(f"Image timestamp: {detections2d.image.ts}") - print(f"Image shape: {detections2d.image.shape}") - print(f"Image frame_id: {detections2d.image.frame_id}") - - print(f"\nFull detections object:") - print(detections2d) - - # Basic type assertions - assert isinstance(detections2d, ImageDetections2D) - assert hasattr(detections2d, "image") - assert hasattr(detections2d, "detections") - - # Image assertions - assert detections2d.image is not None - assert detections2d.image.ts == 1757960670.490248 - assert detections2d.image.shape == (720, 1280, 3) - assert detections2d.image.frame_id == "camera_optical" - - # Detection count assertions - assert len(detections2d) == 1 - assert isinstance(detections2d.detections, list) - assert len(detections2d.detections) == 1 - - # Test first detection with literal checks - det = detections2d.detections[0] - print(f"\n--- Detection 0 (literal checks) ---") - print(f"Type: {type(det)}") - print(f"Name: {det.name}") - print(f"Class ID: {det.class_id}") - print(f"Track ID: {det.track_id}") - print(f"Confidence: {det.confidence}") - print(f"Bbox: {det.bbox}") - print(f"Timestamp: {det.ts}") - - # Detection type assertions - assert isinstance(det, Detection2D) - - # Literal value assertions - assert det.name == "suitcase" - assert det.class_id == 28 # COCO class 28 is suitcase - assert det.track_id == 1 - assert 0.814 < det.confidence < 0.815 # Allow small floating point variance - - # Data type assertions - assert isinstance(det.name, str) - assert isinstance(det.class_id, int) - assert isinstance(det.track_id, int) - assert isinstance(det.confidence, float) - assert isinstance(det.bbox, (tuple, list)) and len(det.bbox) == 4 - assert isinstance(det.ts, float) - - # Bbox literal checks (with tolerance for float precision) - x1, y1, x2, y2 = det.bbox - assert 503.4 < x1 < 503.5 - assert 249.8 < y1 < 250.0 - assert 655.9 < x2 < 656.0 - assert 469.8 < y2 < 470.0 - - # Bbox format assertions - assert all(isinstance(coord, (int, float)) for coord in det.bbox) - assert x2 > x1, f"x2 ({x2}) should be greater than x1 ({x1})" - assert y2 > y1, f"y2 ({y2}) should be greater than y1 ({y1})" - assert x1 >= 0 and y1 >= 0, "Bbox coordinates should be non-negative" - - # Bbox width/height checks - width = x2 - x1 - height = y2 - y1 - assert 152.0 < width < 153.0 # Expected width ~152.5 - assert 219.0 < height < 221.0 # Expected height ~219.9 - - # Confidence assertions - assert 0.0 <= det.confidence <= 1.0, ( - f"Confidence should be between 0 and 1, got {det.confidence}" - ) - - # Image reference assertion - assert det.image is detections2d.image, "Detection should reference the same image" - - # Timestamp consistency - assert det.ts == detections2d.image.ts - assert det.ts == 1757960670.490248 - - -def test_detections3d(detections3d): - print(f"\n=== ImageDetections3D Test ===") - print(f"Type: {type(detections3d)}") - print(f"Number of detections: {len(detections3d)}") - print(f"Image timestamp: {detections3d.image.ts}") - print(f"Image shape: {detections3d.image.shape}") - print(f"Image frame_id: {detections3d.image.frame_id}") - - print(f"\nFull detections object:") - print(detections3d) - - # Basic type assertions - assert isinstance(detections3d, ImageDetections3D) - assert hasattr(detections3d, "image") - assert hasattr(detections3d, "detections") - - # Image assertions - assert detections3d.image is not None - assert detections3d.image.ts == 1757960670.490248 - assert detections3d.image.shape == (720, 1280, 3) - assert detections3d.image.frame_id == "camera_optical" - - # Detection count assertions - assert len(detections3d) == 1 - assert isinstance(detections3d.detections, list) - assert len(detections3d.detections) == 1 - - # Test first 3D detection with literal checks - det = detections3d.detections[0] - print(f"\n--- Detection3D 0 (literal checks) ---") - print(f"Type: {type(det)}") - print(f"Name: {det.name}") - print(f"Class ID: {det.class_id}") - print(f"Track ID: {det.track_id}") - print(f"Confidence: {det.confidence}") - print(f"Bbox: {det.bbox}") - print(f"Timestamp: {det.ts}") - print(f"Has pointcloud: {hasattr(det, 'pointcloud')}") - print(f"Has transform: {hasattr(det, 'transform')}") - if hasattr(det, "pointcloud"): - print(f"Pointcloud points: {len(det.pointcloud)}") - print(f"Pointcloud frame_id: {det.pointcloud.frame_id}") - - # Detection type assertions - assert isinstance(det, Detection3D) - - # Detection3D should have all Detection2D fields plus pointcloud and transform - assert hasattr(det, "bbox") - assert hasattr(det, "track_id") - assert hasattr(det, "class_id") - assert hasattr(det, "confidence") - assert hasattr(det, "name") - assert hasattr(det, "ts") - assert hasattr(det, "image") - assert hasattr(det, "pointcloud") - assert hasattr(det, "transform") - - # Literal value assertions (should match Detection2D) - assert det.name == "suitcase" - assert det.class_id == 28 # COCO class 28 is suitcase - assert det.track_id == 1 - assert 0.814 < det.confidence < 0.815 # Allow small floating point variance - - # Data type assertions - assert isinstance(det.name, str) - assert isinstance(det.class_id, int) - assert isinstance(det.track_id, int) - assert isinstance(det.confidence, float) - assert isinstance(det.bbox, (tuple, list)) and len(det.bbox) == 4 - assert isinstance(det.ts, float) - - # Bbox literal checks (should match Detection2D) - x1, y1, x2, y2 = det.bbox - assert 503.4 < x1 < 503.5 - assert 249.8 < y1 < 250.0 - assert 655.9 < x2 < 656.0 - assert 469.8 < y2 < 470.0 - - # 3D-specific assertions - assert isinstance(det.pointcloud, PointCloud2) - assert isinstance(det.transform, Transform) - - # Pointcloud assertions - assert len(det.pointcloud) == 81 # Based on the output we saw - assert det.pointcloud.frame_id == "world" # Pointcloud should be in world frame - - # Test center calculation - center = det.center - print(f"\nDetection center: {center}") - assert isinstance(center, Vector3) - assert hasattr(center, "x") - assert hasattr(center, "y") - assert hasattr(center, "z") - - # Test pose property - pose = det.pose - print(f"Detection pose: {pose}") - assert isinstance(pose, PoseStamped) - assert pose.frame_id == "world" - assert pose.ts == det.ts - assert pose.position == center # Pose position should match center - - # Check distance calculation (from to_repr_dict) - repr_dict = det.to_repr_dict() - print(f"\nRepr dict: {repr_dict}") - assert "dist" in repr_dict - assert repr_dict["dist"] == "0.88m" # Based on the output - assert repr_dict["points"] == "81" - assert repr_dict["name"] == "suitcase" - assert repr_dict["class"] == "28" - assert repr_dict["track"] == "1" - - # Image reference assertion - assert det.image is detections3d.image, "Detection should reference the same image" - - # Timestamp consistency - assert det.ts == detections3d.image.ts - assert det.ts == 1757960670.490248 - - -def test_detection3d_to_pose(detections3d): - det = detections3d[0] - pose = det.pose - - # Check that pose is valid - assert pose.frame_id == "world" - assert pose.ts == det.ts diff --git a/dimos/perception/detection2d/testing.py b/dimos/perception/detection2d/testing.py new file mode 100644 index 0000000000..0fe5b4adcd --- /dev/null +++ b/dimos/perception/detection2d/testing.py @@ -0,0 +1,238 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +from typing import Optional, TypedDict, Union + +from dimos_lcm.foxglove_msgs.ImageAnnotations import ImageAnnotations +from dimos_lcm.foxglove_msgs.SceneUpdate import SceneUpdate +from dimos_lcm.visualization_msgs.MarkerArray import MarkerArray + +from dimos.core.transport import LCMTransport +from dimos.hardware.camera import zed +from dimos.msgs.geometry_msgs import Transform +from dimos.msgs.sensor_msgs import CameraInfo, PointCloud2 +from dimos.msgs.sensor_msgs.Image import Image +from dimos.msgs.tf2_msgs import TFMessage +from dimos.perception.detection2d.module2D import Detection2DModule +from dimos.perception.detection2d.module3D import Detection3DModule +from dimos.perception.detection2d.moduleDB import ObjectDBModule +from dimos.perception.detection2d.type import ImageDetections2D, ImageDetections3D +from dimos.protocol.tf import TF +from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule +from dimos.robot.unitree_webrtc.type.lidar import LidarMessage +from dimos.robot.unitree_webrtc.type.odometry import Odometry +from dimos.utils.data import get_data +from dimos.utils.testing import TimedSensorReplay + + +class Moment(TypedDict, total=False): + odom_frame: Odometry + lidar_frame: LidarMessage + image_frame: Image + camera_info: CameraInfo + transforms: list[Transform] + tf: TF + annotations: Optional[ImageAnnotations] + detections: Optional[ImageDetections3D] + markers: Optional[MarkerArray] + scene_update: Optional[SceneUpdate] + + +class Moment2D(Moment): + detections2d: ImageDetections2D + + +class Moment3D(Moment): + detections3d: ImageDetections3D + + +tf = TF() + + +def get_g1_moment(**kwargs): + seek = kwargs.get("seek", 10.0) + + data_dir = "replay_g1" + get_data(data_dir) + + lidar_frame = PointCloud2.lcm_decode( + TimedSensorReplay(f"{data_dir}/map#sensor_msgs.PointCloud2").find_closest_seek(seek) + ) + + tf_replay = TimedSensorReplay(f"{data_dir}/tf#tf2_msgs.TFMessage") + tf = TF() + tf.start() + + tf_window = 1.5 + for timestamp, tf_frame in tf_replay.iterate_ts(seek=seek - tf_window, duration=tf_window): + tf.publish(*TFMessage.lcm_decode(tf_frame).transforms) + + image_frame = Image.lcm_decode( + TimedSensorReplay(f"{data_dir}/image#sensor_msgs.Image").find_closest_seek(seek) + ) + + return { + "lidar_frame": lidar_frame, + "image_frame": image_frame, + "camera_info": zed.CameraInfo.SingleWebcam, + "tf": tf, + } + + +def get_moment(**kwargs) -> Moment: + seek = kwargs.get("seek", 10.0) + g1 = kwargs.get("g1", False) + if g1: + return get_g1_moment(**kwargs) + + data_dir = "unitree_go2_lidar_corrected" + get_data(data_dir) + + lidar_frame = TimedSensorReplay(f"{data_dir}/lidar").find_closest_seek(seek) + + image_frame = TimedSensorReplay( + f"{data_dir}/video", + ).find_closest(lidar_frame.ts) + + image_frame.frame_id = "camera_optical" + + odom_frame = TimedSensorReplay(f"{data_dir}/odom", autocast=Odometry.from_msg).find_closest( + lidar_frame.ts + ) + + transforms = ConnectionModule._odom_to_tf(odom_frame) + + tf.publish(*transforms) + + return { + "odom_frame": odom_frame, + "lidar_frame": lidar_frame, + "image_frame": image_frame, + "camera_info": ConnectionModule._camera_info(), + "transforms": transforms, + "tf": tf, + } + + +# Create a single instance of Detection2DModule +_detection2d_module = None +_objectdb_module = None + + +def detections2d(**kwargs) -> Moment2D: + global _detection2d_module + moment = get_moment(**kwargs) + if _detection2d_module is None: + _detection2d_module = Detection2DModule() + + return { + **moment, + "detections2d": _detection2d_module.process_image_frame(moment["image_frame"]), + } + + +# Create a single instance of Detection3DModule +_detection3d_module = None + + +def detections3d(**kwargs) -> Moment3D: + global _detection3d_module + + moment = detections2d(**kwargs) + + camera_transform = moment["tf"].get("camera_optical", moment.get("lidar_frame").frame_id) + if camera_transform is None: + raise ValueError("No camera_optical transform in tf") + + if _detection3d_module is None: + _detection3d_module = Detection3DModule(camera_info=moment["camera_info"]) + + return { + **moment, + "detections3d": _detection3d_module.process_frame( + moment["detections2d"], moment["lidar_frame"], camera_transform + ), + } + + +def objectdb(**kwargs) -> Moment3D: + global _objectdb_module + + moment = detections3d(**kwargs) + camera_transform = moment["tf"].get("camera_optical", moment.get("lidar_frame").frame_id) + if camera_transform is None: + raise ValueError("No camera_optical transform in tf") + + if _objectdb_module is None: + _objectdb_module = ObjectDBModule( + camera_info=moment["camera_info"], goto=lambda x: print("GOTO", x) + ) + + for detection in moment["detections3d"]: + _objectdb_module.add_detection(detection) + + return {**moment, "objectdb": _objectdb_module} + + +# Create transport instances once and reuse them +_transports = {} + + +def _get_transport(topic: str, msg_type): + """Get or create a transport for the given topic.""" + key = (topic, msg_type) + if key not in _transports: + _transports[key] = LCMTransport(topic, msg_type) + return _transports[key] + + +def publish_moment(moment: Union[Moment, Moment2D, Moment3D]): + lidar_frame_transport = _get_transport("/lidar", LidarMessage) + lidar_frame_transport.publish(moment.get("lidar_frame")) + + image_frame_transport = _get_transport("/image", Image) + image_frame_transport.publish(moment.get("image_frame")) + + camera_info_transport = _get_transport("/camera_info", CameraInfo) + camera_info_transport.publish(moment.get("camera_info")) + + odom_frame = moment.get("odom_frame") + if odom_frame: + odom_frame_transport = _get_transport("/odom", Odometry) + odom_frame_transport.publish(moment.get("odom_frame")) + + moment.get("tf").publish_all() + time.sleep(0.1) + moment.get("tf").publish_all() + + detections2d: ImageDetections2D = moment.get("detections2d") + if detections2d: + annotations_transport = _get_transport("/annotations", ImageAnnotations) + annotations_transport.publish(detections2d.to_foxglove_annotations()) + + detections3d: ImageDetections3D = moment.get("detections3d") + if detections3d: + for index, detection in enumerate(detections3d[:3]): + pointcloud_topic = _get_transport(f"/detected/pointcloud/{index}", PointCloud2) + image_topic = _get_transport(f"/detected/image/{index}", Image) + pointcloud_topic.publish(detection.pointcloud) + image_topic.publish(detection.cropped_image()) + + # scene_entity_transport = _get_transport("/scene_update", SceneUpdate) + # scene_entity_transport.publish(detections3d.to_foxglove_scene_update()) + + objectdb: ObjectDBModule = moment.get("objectdb") + if objectdb: + scene_entity_transport = _get_transport("/scene_update", SceneUpdate) + scene_entity_transport.publish(objectdb.to_foxglove_scene_update()) diff --git a/dimos/perception/detection2d/thread_monitor_report.csv b/dimos/perception/detection2d/thread_monitor_report.csv new file mode 100644 index 0000000000..6a2a449b89 --- /dev/null +++ b/dimos/perception/detection2d/thread_monitor_report.csv @@ -0,0 +1,210 @@ +timestamp,test_module,test_name,initial_threads,final_threads,thread_change,leaked_threads,new_thread_names,closed_thread_names,duration_seconds +2025-09-26T19:19:54.379192,dimos.perception.detection2d.test_module,test_module3d,2,2,0,0,,,0.001518 +2025-09-26T19:19:54.381618,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,8,6,6,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop),,2.885595 +2025-09-26T19:19:57.268310,dimos.perception.detection2d.test_yolo_2d_det,test_yolo_detector_initialization,8,8,0,0,,,0.001546 +2025-09-26T19:19:57.279497,dimos.perception.detection2d.test_yolo_2d_det,test_yolo_detector_process_image,8,15,7,7,ThreadPoolExecutor-1_0|ThreadPoolExecutor-1_1|ThreadPoolExecutor-1_2|ThreadPoolExecutor-1_3|ThreadPoolExecutor-1_4|ThreadPoolExecutor-1_5|ThreadPoolExecutor-1_6,,1.020645 +2025-09-26T19:19:58.545394,dimos.perception.detection2d.type.test_detection2d,test_detection_basic_properties,17,17,0,0,,,0.000793 +2025-09-26T19:19:58.547004,dimos.perception.detection2d.type.test_detection2d,test_bounding_box_format,17,17,0,0,,,0.001043 +2025-09-26T19:19:58.549070,dimos.perception.detection2d.type.test_detection2d,test_bbox_2d_volume,17,17,0,0,,,0.001233 +2025-09-26T19:19:58.550936,dimos.perception.detection2d.type.test_detection2d,test_bbox_center_calculation,17,17,0,0,,,0.000535 +2025-09-26T19:19:58.552076,dimos.perception.detection2d.type.test_detection2d,test_cropped_image,17,17,0,0,,,0.000551 +2025-09-26T19:19:58.553221,dimos.perception.detection2d.type.test_detection2d,test_to_ros_bbox,17,17,0,0,,,0.000537 +2025-09-26T19:19:58.554444,dimos.perception.detection2d.type.test_detection2d,test_to_text_annotation,17,17,0,0,,,0.000908 +2025-09-26T19:19:58.555955,dimos.perception.detection2d.type.test_detection2d,test_to_points_annotation,17,17,0,0,,,0.00064 +2025-09-26T19:19:58.557180,dimos.perception.detection2d.type.test_detection2d,test_to_image_annotations,17,17,0,0,,,0.000532 +2025-09-26T19:19:58.558321,dimos.perception.detection2d.type.test_detection2d,test_to_repr_dict,17,17,0,0,,,0.000467 +2025-09-26T19:19:58.724952,dimos.perception.detection2d.type.test_detection2d,test_image_detections2d_properties,17,17,0,0,,,0.000854 +2025-09-26T19:19:58.726623,dimos.perception.detection2d.type.test_detection2d,test_ros_detection2d_conversion,17,17,0,0,,,0.000665 +2025-09-26T19:19:58.991369,dimos.perception.detection2d.type.test_detection3d,test_oriented_bounding_box,19,19,0,0,,,0.001368 +2025-09-26T19:19:58.993744,dimos.perception.detection2d.type.test_detection3d,test_bounding_box_dimensions,19,19,0,0,,,0.002378 +2025-09-26T19:19:58.997017,dimos.perception.detection2d.type.test_detection3d,test_axis_aligned_bounding_box,19,19,0,0,,,0.006685 +2025-09-26T19:19:59.004588,dimos.perception.detection2d.type.test_detection3d,test_point_cloud_properties,19,19,0,0,,,0.00556 +2025-09-26T19:19:59.011050,dimos.perception.detection2d.type.test_detection3d,test_foxglove_scene_entity_generation,19,19,0,0,,,0.000951 +2025-09-26T19:19:59.012833,dimos.perception.detection2d.type.test_detection3d,test_foxglove_cube_properties,19,19,0,0,,,0.000854 +2025-09-26T19:19:59.014499,dimos.perception.detection2d.type.test_detection3d,test_foxglove_text_label,19,19,0,0,,,0.00077 +2025-09-26T19:19:59.016055,dimos.perception.detection2d.type.test_detection3d,test_detection_pose,19,19,0,0,,,0.001504 +2025-09-26T19:19:59.018511,dimos.perception.detection2d.type.test_detection3d_filters,test_module3d,19,27,8,8,Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-19 (_loop),,1.046899 +2025-09-26T19:20:01.829132,dimos.perception.detection2d.type.test_object3d,test_object_db_module_populated,33,33,0,0,,,0.002848 +2025-09-26T19:20:01.834098,dimos.perception.detection2d.type.test_object3d,test_object_db_module_objects_structure,33,33,0,0,,,0.000622 +2025-09-26T19:20:01.837222,dimos.perception.detection2d.type.test_object3d,test_object3d_properties,33,33,0,0,,,0.001442 +2025-09-26T19:20:01.840196,dimos.perception.detection2d.type.test_object3d,test_object3d_multiple_detections,33,33,0,0,,,0.000603 +2025-09-26T19:20:01.843165,dimos.perception.detection2d.type.test_object3d,test_object3d_center,33,33,0,0,,,0.001177 +2025-09-26T19:20:01.845789,dimos.perception.detection2d.type.test_object3d,test_object3d_repr_dict,33,33,0,0,,,0.001359 +2025-09-26T19:20:01.849114,dimos.perception.detection2d.type.test_object3d,test_object3d_scene_entity_label,33,33,0,0,,,0.001437 +2025-09-26T19:20:01.852122,dimos.perception.detection2d.type.test_object3d,test_object3d_agent_encode,33,33,0,0,,,0.000668 +2025-09-26T19:20:01.853653,dimos.perception.detection2d.type.test_object3d,test_object3d_image_property,33,33,0,0,,,0.001136 +2025-09-26T19:20:01.855540,dimos.perception.detection2d.type.test_object3d,test_object3d_addition,33,33,0,0,,,0.001418 +2025-09-26T19:20:01.857851,dimos.perception.detection2d.type.test_object3d,test_image_detections3d_scene_update,33,33,0,0,,,0.001056 +2025-09-26T19:20:21.743295,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,8,6,6,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop),,2.938933 +2025-09-26T19:20:24.683520,dimos.perception.detection2d.test_yolo_2d_det,test_yolo_detector_initialization,8,8,0,0,,,0.004531 +2025-09-26T19:20:24.688809,dimos.perception.detection2d.test_yolo_2d_det,test_yolo_detector_process_image,8,15,7,7,ThreadPoolExecutor-1_0|ThreadPoolExecutor-1_1|ThreadPoolExecutor-1_2|ThreadPoolExecutor-1_3|ThreadPoolExecutor-1_4|ThreadPoolExecutor-1_5|ThreadPoolExecutor-1_6,,1.017079 +2025-09-26T19:20:25.966917,dimos.perception.detection2d.type.test_detection2d,test_detection_basic_properties,17,17,0,0,,,0.000825 +2025-09-26T19:20:25.968571,dimos.perception.detection2d.type.test_detection2d,test_bounding_box_format,17,17,0,0,,,0.000504 +2025-09-26T19:20:25.969670,dimos.perception.detection2d.type.test_detection2d,test_bbox_2d_volume,17,17,0,0,,,0.00111 +2025-09-26T19:20:25.971487,dimos.perception.detection2d.type.test_detection2d,test_bbox_center_calculation,17,17,0,0,,,0.000519 +2025-09-26T19:20:25.972710,dimos.perception.detection2d.type.test_detection2d,test_cropped_image,17,17,0,0,,,0.000546 +2025-09-26T19:20:25.973870,dimos.perception.detection2d.type.test_detection2d,test_to_ros_bbox,17,17,0,0,,,0.000509 +2025-09-26T19:20:25.974983,dimos.perception.detection2d.type.test_detection2d,test_to_text_annotation,17,17,0,0,,,0.00062 +2025-09-26T19:20:25.976220,dimos.perception.detection2d.type.test_detection2d,test_to_points_annotation,17,17,0,0,,,0.000592 +2025-09-26T19:20:25.977374,dimos.perception.detection2d.type.test_detection2d,test_to_image_annotations,17,17,0,0,,,0.000507 +2025-09-26T19:20:25.978560,dimos.perception.detection2d.type.test_detection2d,test_to_repr_dict,17,17,0,0,,,0.000461 +2025-09-26T19:20:26.149543,dimos.perception.detection2d.type.test_detection2d,test_image_detections2d_properties,17,17,0,0,,,0.000849 +2025-09-26T19:20:26.151239,dimos.perception.detection2d.type.test_detection2d,test_ros_detection2d_conversion,17,17,0,0,,,0.000629 +2025-09-26T19:20:26.399438,dimos.perception.detection2d.type.test_detection3d,test_oriented_bounding_box,19,19,0,0,,,0.001212 +2025-09-26T19:20:26.403431,dimos.perception.detection2d.type.test_detection3d,test_bounding_box_dimensions,19,19,0,0,,,0.002155 +2025-09-26T19:20:26.406459,dimos.perception.detection2d.type.test_detection3d,test_axis_aligned_bounding_box,19,19,0,0,,,0.00198 +2025-09-26T19:20:26.409517,dimos.perception.detection2d.type.test_detection3d,test_point_cloud_properties,19,19,0,0,,,0.001458 +2025-09-26T19:20:26.411785,dimos.perception.detection2d.type.test_detection3d,test_foxglove_scene_entity_generation,19,19,0,0,,,0.000818 +2025-09-26T19:20:26.413347,dimos.perception.detection2d.type.test_detection3d,test_foxglove_cube_properties,19,19,0,0,,,0.000753 +2025-09-26T19:20:26.414814,dimos.perception.detection2d.type.test_detection3d,test_foxglove_text_label,19,19,0,0,,,0.000662 +2025-09-26T19:20:26.416170,dimos.perception.detection2d.type.test_detection3d,test_detection_pose,19,19,0,0,,,0.001394 +2025-09-26T19:20:26.418527,dimos.perception.detection2d.type.test_detection3d_filters,test_module3d,19,27,8,8,Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-19 (_loop),,1.036356 +2025-09-26T19:20:29.120195,dimos.perception.detection2d.type.test_object3d,test_object_db_module_populated,33,33,0,0,,,0.000819 +2025-09-26T19:20:29.124424,dimos.perception.detection2d.type.test_object3d,test_object_db_module_objects_structure,33,33,0,0,,,0.001596 +2025-09-26T19:20:29.127647,dimos.perception.detection2d.type.test_object3d,test_object3d_properties,33,33,0,0,,,0.00145 +2025-09-26T19:20:29.130688,dimos.perception.detection2d.type.test_object3d,test_object3d_multiple_detections,33,33,0,0,,,0.001471 +2025-09-26T19:20:29.133662,dimos.perception.detection2d.type.test_object3d,test_object3d_center,33,33,0,0,,,0.001336 +2025-09-26T19:20:29.136447,dimos.perception.detection2d.type.test_object3d,test_object3d_repr_dict,33,33,0,0,,,0.001453 +2025-09-26T19:20:29.139609,dimos.perception.detection2d.type.test_object3d,test_object3d_scene_entity_label,33,33,0,0,,,0.001608 +2025-09-26T19:20:29.142783,dimos.perception.detection2d.type.test_object3d,test_object3d_agent_encode,33,33,0,0,,,0.001384 +2025-09-26T19:20:29.145739,dimos.perception.detection2d.type.test_object3d,test_object3d_image_property,33,33,0,0,,,0.0014 +2025-09-26T19:20:29.148600,dimos.perception.detection2d.type.test_object3d,test_object3d_addition,33,33,0,0,,,0.002081 +2025-09-26T19:20:29.152207,dimos.perception.detection2d.type.test_object3d,test_image_detections3d_scene_update,33,33,0,0,,,0.002095 +2025-09-26T20:21:19.162369,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,4,2,2,Thread-2 (run_forever)|Thread-3 (_loop),,0.046279 +2025-09-26T20:21:20.639690,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,4,2,2,Thread-2 (run_forever)|Thread-3 (_loop),,0.053032 +2025-09-26T20:21:30.053177,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,0.043413 +2025-09-26T20:21:38.089866,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,0.050572 +2025-09-26T20:21:38.288569,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,0.052931 +2025-09-26T20:22:04.531708,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,0.048774 +2025-09-26T20:22:07.135074,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,0.048978 +2025-09-26T20:22:18.495641,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,0.060841 +2025-09-26T20:23:09.921510,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,1.346129 +2025-09-26T20:24:47.931337,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,3.84527 +2025-09-26T20:24:59.060263,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,0.053613 +2025-09-26T20:25:05.771544,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,3.892861 +2025-09-26T20:25:41.069615,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,4.202698 +2025-09-26T20:26:03.037005,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,1,0,0,,,0.036067 +2025-09-26T20:26:22.868376,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,5.63436 +2025-09-26T20:30:12.473288,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,7.350234 +2025-09-26T20:31:17.179166,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,4.087574 +2025-09-26T20:32:54.824655,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,5.661503 +2025-09-26T20:33:12.165843,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,5.482186 +2025-09-26T20:33:25.477659,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,11.251526 +2025-09-26T20:34:03.773972,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,10.599549 +2025-09-26T20:34:35.064512,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,5.609869 +2025-09-26T20:36:30.134349,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,0.319505 +2025-09-26T20:36:34.464576,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,6.992168 +2025-09-26T20:39:38.267359,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,6.485351 +2025-09-26T20:41:18.218244,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,6.932784 +2025-09-26T20:41:40.645139,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,1,3,2,2,Thread-1 (run_forever)|Thread-2 (_loop),,6.255135 +2025-09-26T20:42:18.133300,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,8,6,6,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop),,2.151888 +2025-09-26T20:42:27.389096,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,8,6,6,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop),,2.144457 +2025-09-26T20:42:35.625436,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,8,6,6,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop),,2.115704 +2025-09-26T20:42:50.848395,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,12,10,10,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop),,2.845829 +2025-09-26T20:42:59.689297,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,12,10,10,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop),,2.892929 +2025-09-26T20:51:22.099995,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,12,10,10,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop),,2.774706 +2025-09-26T20:51:33.454931,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,4,2,2,Thread-2 (run_forever)|Thread-3 (_loop),,0.058218 +2025-09-26T20:51:49.664137,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,4,2,2,Thread-2 (run_forever)|Thread-3 (_loop),,0.120992 +2025-09-26T20:52:43.699422,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,4,2,2,Thread-2 (run_forever)|Thread-3 (_loop),,0.118322 +2025-09-26T20:52:58.399305,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,4,2,2,Thread-2 (run_forever)|Thread-3 (_loop),,0.49951 +2025-09-26T20:54:22.530651,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,10,8,8,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (run_forever)|Thread-6 (_loop)|Thread-7 (run)|ThreadPoolExecutor-1_0|ThreadPoolExecutor-1_1|ThreadPoolExecutor-1_2,,3.142741 +2025-09-26T20:55:02.978190,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,10,8,8,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run)|ThreadPoolExecutor-1_0|ThreadPoolExecutor-1_1|ThreadPoolExecutor-1_2,,0.510929 +2025-09-26T20:55:31.831699,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,8,6,6,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop),,3.146847 +2025-09-26T20:55:48.039892,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,9,7,7,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop),,3.62106 +2025-09-26T20:56:00.091258,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,9,7,7,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop),,3.85161 +2025-09-26T20:56:36.039430,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,8,6,6,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop),,0.373017 +2025-09-26T20:56:46.177513,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,8,6,6,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop),,0.441456 +2025-09-26T20:57:04.819808,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,9,7,7,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop),,3.831966 +2025-09-26T20:57:26.420865,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,9,7,7,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop),,3.432511 +2025-09-26T20:57:41.632088,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,9,7,7,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop),,3.572344 +2025-09-26T20:59:44.833182,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,8,6,6,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop),,2.957557 +2025-09-26T21:00:00.460394,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,9,7,7,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop),,3.531353 +2025-09-26T21:07:19.402805,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,2,0,0,,,0.286919 +2025-09-26T21:07:33.193310,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,2,0,0,,,0.298396 +2025-09-26T21:08:00.866770,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,9,7,7,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop),,3.854635 +2025-09-26T21:08:28.034051,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,9,7,7,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop),,3.974273 +2025-09-26T21:08:44.551138,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,2,0,0,,,0.273476 +2025-09-26T21:08:55.657202,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,2,0,0,,,0.245609 +2025-09-26T21:09:00.953456,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,9,7,7,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-5 (_loop)|Thread-6 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop),,3.266123 +2025-09-26T21:10:50.563809,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,13,11,11,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop),,3.642472 +2025-09-26T21:11:01.662644,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,13,11,11,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop),,3.256002 +2025-09-26T21:11:13.164928,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,13,11,11,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-7 (_loop)|Thread-8 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop),,3.579078 +2025-09-26T21:15:24.550523,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,16,14,14,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop),,4.169912 +2025-09-26T21:15:36.587785,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,16,14,14,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop),,4.260964 +2025-09-26T21:15:56.563667,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,16,14,14,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop),,3.329131 +2025-09-26T21:16:21.461189,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,16,14,14,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop),,4.616649 +2025-09-26T21:18:32.676323,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,12,10,10,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop),,4.093313 +2025-09-26T21:19:02.769054,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,20,18,18,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-19 (_loop)|Thread-20 (_loop),,5.876751 +2025-09-26T21:19:28.249666,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,20,18,18,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-19 (_loop)|Thread-20 (_loop),,13.500141 +2025-09-26T21:23:49.180148,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,18,16,16,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop),,6.745949 +2025-09-26T21:24:19.318338,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,20,18,18,Thread-2 (run_forever)|Thread-3 (_loop)|Thread-4 (run_forever)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-9 (_loop)|Thread-10 (_loop)|Thread-11 (_loop)|Thread-12 (_loop)|Thread-13 (_loop)|Thread-14 (_loop)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-19 (_loop)|Thread-20 (_loop),,7.394865 +2025-09-26T21:56:41.037344,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,22,20,20,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop),,7.848069 +2025-09-26T21:57:13.088641,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,22,20,20,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop),,7.019152 +2025-09-26T21:57:24.501563,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,7.54012 +2025-09-26T21:59:04.886678,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,37.622692 +2025-09-26T21:59:51.750976,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,15.569444 +2025-09-26T22:00:12.129145,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,31.895698 +2025-09-26T22:00:48.421398,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,37.559155 +2025-09-26T22:02:28.714625,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,7.533279 +2025-09-26T22:02:41.314814,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,37.330718 +2025-09-26T22:03:25.387171,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,18.811334 +2025-09-26T22:03:50.157214,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,23.603525 +2025-09-26T22:04:18.653888,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,21.43122 +2025-09-26T22:05:06.693792,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,40.663185 +2025-09-26T22:06:10.956855,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,44.398701 +2025-09-26T22:07:15.649908,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,36.904216 +2025-09-26T22:08:00.336034,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,26.294427 +2025-09-26T22:08:32.729168,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,35.683161 +2025-09-26T22:09:13.933173,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,23.668677 +2025-09-26T22:11:00.968767,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,36.469964 +2025-09-26T22:11:42.617464,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,47.092509 +2025-09-26T22:13:37.233940,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,35.421827 +2025-09-26T22:14:17.572317,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,46.901453 +2025-09-26T22:17:32.109956,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,30.179645 +2025-09-26T22:18:10.839922,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,47.087939 +2025-09-26T22:19:13.499558,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,46.740673 +2025-09-26T22:20:22.388947,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,46.711166 +2025-09-26T22:21:18.618912,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,23.319262 +2025-09-26T22:21:47.408626,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,47.887326 +2025-09-26T22:23:25.192098,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,27.567826 +2025-09-26T22:29:20.655400,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,22,20,20,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop),,7.834528 +2025-09-26T22:29:38.676467,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,38.416378 +2025-09-26T22:30:22.173751,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop)|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join,,36.314138 +2025-09-26T22:31:03.357228,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,13.579422 +2025-09-26T22:31:21.557210,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,41.263518 +2025-09-26T22:32:07.793031,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,26.183927 +2025-09-26T22:32:39.180680,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,32.074383 +2025-09-26T22:33:17.385932,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,43.263926 +2025-09-26T22:34:14.699754,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,47.492652 +2025-09-26T22:43:19.368309,dimos.perception.detection2d.test_moduleDB,test_moduleDB,2,23,21,21,IO loop|Profile|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch message queue|QueueFeederThread|AsyncProcess Dask Worker process (from Nanny) watch message queue|AsyncProcess Dask Worker process (from Nanny) watch process join|AsyncProcess Dask Worker process (from Nanny) watch process join|Dask-Offload_0|Thread-4 (_loop)|Thread-6 (_loop)|Thread-8 (_loop),,47.552228 +2025-09-27T17:57:06.427830,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,8,6,6,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop),,0.289448 +2025-09-27T17:57:30.116141,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,16,14,14,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop),,7.382514 +2025-09-27T17:58:27.591734,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,8,6,6,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop),,0.166295 +2025-09-27T17:58:44.098746,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,16,14,14,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop),,7.179522 +2025-09-27T17:59:00.368078,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,16,14,14,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop),,6.170713 +2025-09-27T18:00:04.318368,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,19,17,17,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop),,5.413316 +2025-09-27T18:00:19.199622,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,19,17,17,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop),,5.367109 +2025-09-27T18:00:34.799745,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,20,18,18,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop),,3.467772 +2025-09-27T18:00:44.405039,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,20,18,18,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop),,3.48599 +2025-09-27T18:01:55.863290,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,20,18,18,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop),,4.220308 +2025-09-27T18:06:51.465114,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,19,17,17,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop),,3.501642 +2025-09-27T18:07:23.741311,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,20,18,18,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop),,3.82077 +2025-09-27T18:07:33.542755,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,20,18,18,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop),,4.020235 +2025-09-27T18:07:56.329029,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,3.576301 +2025-09-27T18:08:07.839917,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,3.986368 +2025-09-27T18:14:31.454478,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,4.208516 +2025-09-27T18:15:01.658331,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,3.598878 +2025-09-27T18:15:20.787995,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,5.039514 +2025-09-27T18:15:49.414461,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,3.896179 +2025-09-27T18:16:05.766640,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,3.871845 +2025-09-27T18:16:24.389201,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,4.086401 +2025-09-27T18:16:36.047673,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,3.996469 +2025-09-27T18:17:53.485618,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,23,21,21,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop),,5.661548 +2025-09-27T18:18:23.112511,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,25,23,23,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop)|Thread-24 (_loop)|Thread-25 (_loop),,4.5843 +2025-09-27T18:18:52.349188,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,25,23,23,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop)|Thread-24 (_loop)|Thread-25 (_loop),,4.560781 +2025-09-27T18:19:24.261027,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,27,25,25,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop)|Thread-24 (_loop)|Thread-25 (_loop)|Thread-26 (_loop)|Thread-27 (_loop),,5.814174 +2025-09-27T18:19:41.179952,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,27,25,25,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop)|Thread-24 (_loop)|Thread-25 (_loop)|Thread-26 (_loop)|Thread-27 (_loop),,5.856868 +2025-09-27T18:20:08.302972,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,27,25,25,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop)|Thread-24 (_loop)|Thread-25 (_loop)|Thread-26 (_loop)|Thread-27 (_loop),,6.686924 +2025-09-27T20:20:23.092088,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,27,25,25,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop)|Thread-24 (_loop)|Thread-25 (_loop)|Thread-26 (_loop)|Thread-27 (_loop),,6.41635 +2025-09-27T20:20:50.518887,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,27,25,25,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop)|Thread-24 (_loop)|Thread-25 (_loop)|Thread-26 (_loop)|Thread-27 (_loop),,6.522804 +2025-09-27T20:21:13.213561,dimos.perception.detection2d.test_module2d,test_module2d_visual_query,2,27,25,25,Thread-2 (_loop)|Thread-3 (_loop)|Thread-4 (_loop)|Thread-5 (_loop)|Thread-6 (run_forever)|Thread-7 (_loop)|Thread-8 (run_forever)|Thread-9 (_loop)|Thread-10 (run_forever)|Thread-11 (_loop)|Thread-12 (run_forever)|Thread-13 (_loop)|Thread-14 (run_forever)|Thread-15 (_loop)|Thread-16 (_loop)|Thread-17 (_loop)|Thread-18 (_loop)|Thread-20 (_loop)|Thread-21 (_loop)|Thread-22 (_loop)|Thread-23 (_loop)|Thread-24 (_loop)|Thread-25 (_loop)|Thread-26 (_loop)|Thread-27 (_loop),,7.391736 diff --git a/dimos/perception/detection2d/type/__init__.py b/dimos/perception/detection2d/type/__init__.py new file mode 100644 index 0000000000..fb7c435c0c --- /dev/null +++ b/dimos/perception/detection2d/type/__init__.py @@ -0,0 +1,7 @@ +from dimos.perception.detection2d.type.detection2d import ( + Detection2D, + ImageDetections2D, + InconvinientDetectionFormat, +) +from dimos.perception.detection2d.type.detection3d import Detection3D, ImageDetections3D +from dimos.perception.detection2d.type.imageDetections import ImageDetections, TableStr diff --git a/dimos/perception/detection2d/type.py b/dimos/perception/detection2d/type/detection2d.py similarity index 61% rename from dimos/perception/detection2d/type.py rename to dimos/perception/detection2d/type/detection2d.py index b37828e8e8..5bbedfb6ae 100644 --- a/dimos/perception/detection2d/type.py +++ b/dimos/perception/detection2d/type/detection2d.py @@ -14,13 +14,10 @@ from __future__ import annotations -import functools import hashlib from dataclasses import dataclass -from typing import Any, Dict, Generic, List, Optional, Tuple, TypeVar +from typing import Any, Dict, List, Tuple -import numpy as np -from dimos_lcm.foxglove_msgs.Color import Color from dimos_lcm.foxglove_msgs.ImageAnnotations import ( PointsAnnotation, TextAnnotation, @@ -37,14 +34,13 @@ Detection2D as ROSDetection2D, ) from rich.console import Console -from rich.table import Table from rich.text import Text from dimos.msgs.foxglove_msgs import ImageAnnotations -from dimos.msgs.geometry_msgs import PoseStamped, Transform, Vector3 -from dimos.msgs.sensor_msgs import Image, PointCloud2 +from dimos.msgs.foxglove_msgs.Color import Color +from dimos.msgs.sensor_msgs import Image from dimos.msgs.std_msgs import Header -from dimos.msgs.vision_msgs import Detection2DArray +from dimos.perception.detection2d.type.imageDetections import ImageDetections from dimos.types.timestamped import Timestamped, to_ros_stamp, to_timestamp Bbox = Tuple[float, float, float, float] @@ -56,6 +52,32 @@ Detections = List[Detection] +def _hash_to_color(name: str) -> str: + """Generate a consistent color for a given name using hash.""" + # List of rich colors to choose from + colors = [ + "cyan", + "magenta", + "yellow", + "blue", + "green", + "red", + "bright_cyan", + "bright_magenta", + "bright_yellow", + "bright_blue", + "bright_green", + "bright_red", + "purple", + "white", + "pink", + ] + + # Hash the name and pick a color + hash_value = hashlib.md5(name.encode()).digest()[0] + return colors[hash_value % len(colors)] + + # yolo and detic have bad formats this translates into list of detections def better_detection_format(inconvinient_detections: InconvinientDetectionFormat) -> Detections: bboxes, track_ids, class_ids, confidences, names = inconvinient_detections @@ -84,13 +106,10 @@ def to_repr_dict(self) -> Dict[str, Any]: "name": self.name, "class": str(self.class_id), "track": str(self.track_id), - "conf": self.confidence, + "conf": f"{self.confidence:.2f}", "bbox": f"[{x1:.0f},{y1:.0f},{x2:.0f},{y2:.0f}]", } - def to_image(self) -> Image: - return self.image - # return focused image, only on the bbox def cropped_image(self, padding: int = 20) -> Image: """Return a cropped version of the image focused on the bounding box. @@ -110,26 +129,18 @@ def __str__(self): console = Console(force_terminal=True, legacy_windows=False) d = self.to_repr_dict() - # Create confidence text with color based on value - conf_color = "green" if d["conf"] > 0.8 else "yellow" if d["conf"] > 0.5 else "red" - conf_text = Text(f"{d['conf']:.1%}", style=conf_color) - # Build the string representation parts = [ Text(f"{self.__class__.__name__}("), - Text(d["name"], style="bold cyan"), - Text(f" cls={d['class']} trk={d['track']} "), - conf_text, - Text(f" {d['bbox']}"), ] # Add any extra fields (e.g., points for Detection3D) - extra_keys = [k for k in d.keys() if k not in ["name", "class", "track", "conf", "bbox"]] + extra_keys = [k for k in d.keys() if k not in ["class"]] for key in extra_keys: if d[key] == "None": - parts.append(Text(f" {key}={d[key]}", style="dim")) + parts.append(Text(f"{key}={d[key]}", style="dim")) else: - parts.append(Text(f" {key}={d[key]}", style="blue")) + parts.append(Text(f"{key}={d[key]}", style=_hash_to_color(key))) parts.append(Text(")")) @@ -138,6 +149,12 @@ def __str__(self): console.print(*parts, end="") return capture.get().strip() + def bbox_2d_volume(self) -> float: + x1, y1, x2, y2 = self.bbox + width = max(0.0, x2 - x1) + height = max(0.0, y2 - y1) + return width * height + @classmethod def from_detector( cls, raw_detections: InconvinientDetectionFormat, **kwargs @@ -179,7 +196,7 @@ def to_ros_bbox(self) -> BoundingBox2D: ) def lcm_encode(self): - return self.to_imageannotations().lcm_encode() + return self.to_image_annotations().lcm_encode() def to_text_annotation(self) -> List[TextAnnotation]: x1, y1, x2, y2 = self.bbox @@ -198,7 +215,7 @@ def to_text_annotation(self) -> List[TextAnnotation]: TextAnnotation( timestamp=to_ros_stamp(self.ts), position=Point2(x=x1, y=y1), - text=f"{self.name}_{self.class_id}_{self.track_id}", + text=f"{self.name}_{self.track_id}", font_size=font_size, text_color=Color(r=1.0, g=1.0, b=1.0, a=1), background_color=Color(r=0, g=0, b=0, a=1), @@ -214,7 +231,7 @@ def to_points_annotation(self) -> List[PointsAnnotation]: PointsAnnotation( timestamp=to_ros_stamp(self.ts), outline_color=Color(r=0.0, g=0.0, b=0.0, a=1.0), - fill_color=Color(r=1.0, g=0.0, b=0.0, a=0.15), + fill_color=Color.from_string(self.name, alpha=0.15), thickness=thickness, points_length=4, points=[ @@ -230,7 +247,7 @@ def to_points_annotation(self) -> List[PointsAnnotation]: # this is almost never called directly since this is a single detection # and ImageAnnotations message normally contains multiple detections annotations # so ImageDetections2D and ImageDetections3D normally implements this for whole image - def to_annotations(self) -> ImageAnnotations: + def to_image_annotations(self) -> ImageAnnotations: points = self.to_points_annotation() texts = self.to_text_annotation() @@ -300,179 +317,6 @@ def to_ros_detection2d(self) -> ROSDetection2D: id=str(self.track_id), ) - def to_3d(self, **kwargs) -> "Detection3D": - return Detection3D( - image=self.image, - bbox=self.bbox, - track_id=self.track_id, - class_id=self.class_id, - confidence=self.confidence, - name=self.name, - ts=self.ts, - **kwargs, - ) - - -@dataclass -class Detection3D(Detection2D): - pointcloud: PointCloud2 - transform: Transform - - def localize(self, pointcloud: PointCloud2) -> Detection3D: - self.pointcloud = pointcloud - return self - - @functools.cached_property - def center(self) -> Vector3: - """Calculate the center of the pointcloud in world frame.""" - points = np.asarray(self.pointcloud.pointcloud.points) - center = points.mean(axis=0) - return Vector3(*center) - - @functools.cached_property - def pose(self) -> PoseStamped: - """Convert detection to a PoseStamped using pointcloud center. - - Returns pose in world frame with identity rotation. - The pointcloud is already in world frame. - """ - return PoseStamped( - ts=self.ts, - frame_id="world", - position=self.center, - orientation=(0.0, 0.0, 0.0, 1.0), # Identity quaternion - ) - - def to_repr_dict(self) -> Dict[str, Any]: - d = super().to_repr_dict() - - # Add pointcloud info - d["points"] = str(len(self.pointcloud)) - - # Calculate distance from camera - # The pointcloud is in world frame, and transform gives camera position in world - center_world = self.center - # Camera position in world frame is the translation part of the transform - camera_pos = self.transform.translation - # Use Vector3 subtraction and magnitude - distance = (center_world - camera_pos).magnitude() - d["dist"] = f"{distance:.2f}m" - - return d - - -T = TypeVar("T", bound="Detection2D") - - -def _hash_to_color(name: str) -> str: - """Generate a consistent color for a given name using hash.""" - # List of rich colors to choose from - colors = [ - "cyan", - "magenta", - "yellow", - "blue", - "green", - "red", - "bright_cyan", - "bright_magenta", - "bright_yellow", - "bright_blue", - "bright_green", - "bright_red", - "purple", - "white", - "pink", - ] - - # Hash the name and pick a color - hash_value = hashlib.md5(name.encode()).digest()[0] - return colors[hash_value % len(colors)] - - -class ImageDetections(Generic[T]): - image: Image - detections: List[T] - - def __init__(self, image: Image, detections: List[T]): - self.image = image - self.detections = detections - for det in self.detections: - if not det.ts: - det.ts = image.ts - - def __str__(self): - console = Console(force_terminal=True, legacy_windows=False) - - # Dynamically build columns based on the first detection's dict keys - if not self.detections: - return "Empty ImageDetections" - - # Create a table for detections - table = Table( - title=f"{self.__class__.__name__} [{len(self.detections)} detections @ {to_timestamp(self.image.ts):.3f}]", - show_header=True, - show_edge=True, - ) - - # Cache all repr_dicts to avoid double computation - detection_dicts = [det.to_repr_dict() for det in self.detections] - - first_dict = detection_dicts[0] - table.add_column("#", style="dim") - for col in first_dict.keys(): - color = _hash_to_color(col) - table.add_column(col.title(), style=color) - - # Add each detection to the table - for i, d in enumerate(detection_dicts): - row = [str(i)] - - for key in first_dict.keys(): - if key == "conf": - # Color-code confidence - conf_color = "green" if d[key] > 0.8 else "yellow" if d[key] > 0.5 else "red" - row.append(Text(f"{d[key]:.1%}", style=conf_color)) - elif key == "points" and d.get(key) == "None": - row.append(Text(d.get(key, ""), style="dim")) - else: - row.append(str(d.get(key, ""))) - table.add_row(*row) - - with console.capture() as capture: - console.print(table) - return capture.get().strip() - - def __len__(self): - return len(self.detections) - - def __iter__(self): - return iter(self.detections) - - def __getitem__(self, index): - return self.detections[index] - - def to_ros_detection2d_array(self) -> Detection2DArray: - return Detection2DArray( - detections_length=len(self.detections), - header=Header(self.image.ts, "camera_optical"), - detections=[det.to_ros_detection2d() for det in self.detections], - ) - - def to_image_annotations(self) -> ImageAnnotations: - def flatten(xss): - return [x for xs in xss for x in xs] - - texts = flatten(det.to_text_annotation() for det in self.detections) - points = flatten(det.to_points_annotation() for det in self.detections) - - return ImageAnnotations( - texts=texts, - texts_length=len(texts), - points=points, - points_length=len(points), - ) - class ImageDetections2D(ImageDetections[Detection2D]): @classmethod @@ -483,9 +327,3 @@ def from_detector( image=image, detections=Detection2D.from_detector(raw_detections, image=image, ts=image.ts), ) - - -class ImageDetections3D(ImageDetections[Detection3D]): - """Specialized class for 3D detections in an image.""" - - ... diff --git a/dimos/perception/detection2d/type/detection3d.pkl b/dimos/perception/detection2d/type/detection3d.pkl new file mode 100644 index 0000000000..b42ccb31e4 Binary files /dev/null and b/dimos/perception/detection2d/type/detection3d.pkl differ diff --git a/dimos/perception/detection2d/type/detection3d.py b/dimos/perception/detection2d/type/detection3d.py new file mode 100644 index 0000000000..e03c9aeb75 --- /dev/null +++ b/dimos/perception/detection2d/type/detection3d.py @@ -0,0 +1,403 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import functools +from dataclasses import dataclass +from typing import Any, Callable, Dict, Optional, TypeVar + +import numpy as np +from dimos_lcm.sensor_msgs import CameraInfo +from lcm_msgs.builtin_interfaces import Duration +from lcm_msgs.foxglove_msgs import CubePrimitive, SceneEntity, SceneUpdate, TextPrimitive +from lcm_msgs.geometry_msgs import Point, Pose, Quaternion +from lcm_msgs.geometry_msgs import Vector3 as LCMVector3 + +from dimos.msgs.foxglove_msgs.Color import Color +from dimos.msgs.geometry_msgs import PoseStamped, Transform, Vector3 +from dimos.msgs.sensor_msgs import PointCloud2 +from dimos.perception.detection2d.type.detection2d import Detection2D +from dimos.perception.detection2d.type.imageDetections import ImageDetections +from dimos.types.timestamped import to_ros_stamp + +type Detection3DFilter = Callable[ + [Detection2D, PointCloud2, CameraInfo, Transform], Optional[Detection3D] +] + + +def height_filter(height=0.1) -> Detection3DFilter: + return lambda det, pc, ci, tf: pc.filter_by_height(height) + + +def statistical(nb_neighbors=40, std_ratio=0.5) -> Detection3DFilter: + def filter_func( + det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform + ) -> Optional[PointCloud2]: + try: + statistical, removed = pc.pointcloud.remove_statistical_outlier( + nb_neighbors=nb_neighbors, std_ratio=std_ratio + ) + return PointCloud2(statistical, pc.frame_id, pc.ts) + except Exception as e: + # print("statistical filter failed:", e) + return None + + return filter_func + + +def raycast() -> Detection3DFilter: + def filter_func( + det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform + ) -> Optional[PointCloud2]: + try: + camera_pos = tf.inverse().translation + camera_pos_np = camera_pos.to_numpy() + _, visible_indices = pc.pointcloud.hidden_point_removal(camera_pos_np, radius=100.0) + visible_pcd = pc.pointcloud.select_by_index(visible_indices) + return PointCloud2(visible_pcd, pc.frame_id, pc.ts) + except Exception as e: + # print("raycast filter failed:", e) + return None + + return filter_func + + +def radius_outlier(min_neighbors: int = 20, radius: float = 0.3) -> Detection3DFilter: + """ + Remove isolated points: keep only points that have at least `min_neighbors` + neighbors within `radius` meters (same units as your point cloud). + """ + + def filter_func( + det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform + ) -> Optional[PointCloud2]: + filtered_pcd, removed = pc.pointcloud.remove_radius_outlier( + nb_points=min_neighbors, radius=radius + ) + return PointCloud2(filtered_pcd, pc.frame_id, pc.ts) + + return filter_func + + +@dataclass +class Detection3D(Detection2D): + pointcloud: PointCloud2 + transform: Transform + frame_id: str = "unknown" + + @classmethod + def from_2d( + cls, + det: Detection2D, + world_pointcloud: PointCloud2, + camera_info: CameraInfo, + world_to_optical_transform: Transform, + # filters are to be adjusted based on the sensor noise characteristics if feeding + # sensor data directly + filters: list[Callable[[PointCloud2], PointCloud2]] = [ + # height_filter(0.1), + raycast(), + radius_outlier(), + statistical(), + ], + ) -> Optional["Detection3D"]: + """Create a Detection3D from a 2D detection by projecting world pointcloud. + + This method handles: + 1. Projecting world pointcloud to camera frame + 2. Filtering points within the 2D detection bounding box + 3. Cleaning up the pointcloud (height filter, outlier removal) + 4. Hidden point removal from camera perspective + + Args: + det: The 2D detection + world_pointcloud: Full pointcloud in world frame + camera_info: Camera calibration info + world_to_camerlka_transform: Transform from world to camera frame + filters: List of functions to apply to the pointcloud for filtering + Returns: + Detection3D with filtered pointcloud, or None if no valid points + """ + # Extract camera parameters + fx, fy = camera_info.K[0], camera_info.K[4] + cx, cy = camera_info.K[2], camera_info.K[5] + image_width = camera_info.width + image_height = camera_info.height + + camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) + + # Convert pointcloud to numpy array + world_points = world_pointcloud.as_numpy() + + # Project points to camera frame + points_homogeneous = np.hstack([world_points, np.ones((world_points.shape[0], 1))]) + extrinsics_matrix = world_to_optical_transform.to_matrix() + points_camera = (extrinsics_matrix @ points_homogeneous.T).T + + # Filter out points behind the camera + valid_mask = points_camera[:, 2] > 0 + points_camera = points_camera[valid_mask] + world_points = world_points[valid_mask] + + if len(world_points) == 0: + return None + + # Project to 2D + points_2d_homogeneous = (camera_matrix @ points_camera[:, :3].T).T + points_2d = points_2d_homogeneous[:, :2] / points_2d_homogeneous[:, 2:3] + + # Filter points within image bounds + in_image_mask = ( + (points_2d[:, 0] >= 0) + & (points_2d[:, 0] < image_width) + & (points_2d[:, 1] >= 0) + & (points_2d[:, 1] < image_height) + ) + points_2d = points_2d[in_image_mask] + world_points = world_points[in_image_mask] + + if len(world_points) == 0: + return None + + # Extract bbox from Detection2D + x_min, y_min, x_max, y_max = det.bbox + + # Find points within this detection box (with small margin) + margin = 5 # pixels + in_box_mask = ( + (points_2d[:, 0] >= x_min - margin) + & (points_2d[:, 0] <= x_max + margin) + & (points_2d[:, 1] >= y_min - margin) + & (points_2d[:, 1] <= y_max + margin) + ) + + detection_points = world_points[in_box_mask] + + if detection_points.shape[0] == 0: + # print(f"No points found in detection bbox after projection. {det.name}") + return None + + # Create initial pointcloud for this detection + initial_pc = PointCloud2.from_numpy( + detection_points, + frame_id=world_pointcloud.frame_id, + timestamp=world_pointcloud.ts, + ) + + # Apply filters - each filter needs all 4 arguments + detection_pc = initial_pc + for filter_func in filters: + result = filter_func(det, detection_pc, camera_info, world_to_optical_transform) + if result is None: + return None + detection_pc = result + + # Final check for empty pointcloud + if len(detection_pc.pointcloud.points) == 0: + return None + + # Create Detection3D with filtered pointcloud + return Detection3D( + image=det.image, + bbox=det.bbox, + track_id=det.track_id, + class_id=det.class_id, + confidence=det.confidence, + name=det.name, + ts=det.ts, + pointcloud=detection_pc, + transform=world_to_optical_transform, + frame_id=world_pointcloud.frame_id, + ) + + @functools.cached_property + def center(self) -> Vector3: + return Vector3(*self.pointcloud.center) + + @functools.cached_property + def pose(self) -> PoseStamped: + """Convert detection to a PoseStamped using pointcloud center. + + Returns pose in world frame with identity rotation. + The pointcloud is already in world frame. + """ + return PoseStamped( + ts=self.ts, + frame_id=self.frame_id, + position=self.center, + orientation=(0.0, 0.0, 0.0, 1.0), # Identity quaternion + ) + + def get_bounding_box(self): + """Get axis-aligned bounding box of the detection's pointcloud.""" + return self.pointcloud.get_axis_aligned_bounding_box() + + def get_oriented_bounding_box(self): + """Get oriented bounding box of the detection's pointcloud.""" + return self.pointcloud.get_oriented_bounding_box() + + def get_bounding_box_dimensions(self) -> tuple[float, float, float]: + """Get dimensions (width, height, depth) of the detection's bounding box.""" + return self.pointcloud.get_bounding_box_dimensions() + + def bounding_box_intersects(self, other: "Detection3D") -> bool: + """Check if this detection's bounding box intersects with another's.""" + return self.pointcloud.bounding_box_intersects(other.pointcloud) + + def to_repr_dict(self) -> Dict[str, Any]: + # Calculate distance from camera + # The pointcloud is in world frame, and transform gives camera position in world + center_world = self.center + # Camera position in world frame is the translation part of the transform + camera_pos = self.transform.translation + # Use Vector3 subtraction and magnitude + distance = (center_world - camera_pos).magnitude() + + parent_dict = super().to_repr_dict() + # Remove bbox key if present + parent_dict.pop("bbox", None) + + return { + **parent_dict, + "dist": f"{distance:.2f}m", + "points": str(len(self.pointcloud)), + } + + def to_foxglove_scene_entity(self, entity_id: str = None) -> "SceneEntity": + """Convert detection to a Foxglove SceneEntity with cube primitive and text label. + + Args: + entity_id: Optional custom entity ID. If None, generates one from name and hash. + + Returns: + SceneEntity with cube bounding box and text label + """ + + # Create a cube primitive for the bounding box + cube = CubePrimitive() + + # Get the axis-aligned bounding box + aabb = self.get_bounding_box() + + # Set pose from axis-aligned bounding box + cube.pose = Pose() + cube.pose.position = Point() + # Get center of the axis-aligned bounding box + aabb_center = aabb.get_center() + cube.pose.position.x = aabb_center[0] + cube.pose.position.y = aabb_center[1] + cube.pose.position.z = aabb_center[2] + + # For axis-aligned box, use identity quaternion (no rotation) + cube.pose.orientation = Quaternion() + cube.pose.orientation.x = 0 + cube.pose.orientation.y = 0 + cube.pose.orientation.z = 0 + cube.pose.orientation.w = 1 + + # Set size from axis-aligned bounding box + cube.size = LCMVector3() + aabb_extent = aabb.get_extent() + cube.size.x = aabb_extent[0] # width + cube.size.y = aabb_extent[1] # height + cube.size.z = aabb_extent[2] # depth + + # Set color based on name hash + cube.color = Color.from_string(self.name, alpha=0.2) + + # Create text label + text = TextPrimitive() + text.pose = Pose() + text.pose.position = Point() + text.pose.position.x = aabb_center[0] + text.pose.position.y = aabb_center[1] + text.pose.position.z = aabb_center[2] + aabb_extent[2] / 2 + 0.1 # Above the box + text.pose.orientation = Quaternion() + text.pose.orientation.x = 0 + text.pose.orientation.y = 0 + text.pose.orientation.z = 0 + text.pose.orientation.w = 1 + text.billboard = True + text.font_size = 20.0 + text.scale_invariant = True + text.color = Color() + text.color.r = 1.0 + text.color.g = 1.0 + text.color.b = 1.0 + text.color.a = 1.0 + text.text = self.scene_entity_label() + + # Create scene entity + entity = SceneEntity() + entity.timestamp = to_ros_stamp(self.ts) + entity.frame_id = self.frame_id + entity.id = str(self.track_id) + entity.lifetime = Duration() + entity.lifetime.sec = 0 # Persistent + entity.lifetime.nanosec = 0 + entity.frame_locked = False + + # Initialize all primitive arrays + entity.metadata_length = 0 + entity.metadata = [] + entity.arrows_length = 0 + entity.arrows = [] + entity.cubes_length = 1 + entity.cubes = [cube] + entity.spheres_length = 0 + entity.spheres = [] + entity.cylinders_length = 0 + entity.cylinders = [] + entity.lines_length = 0 + entity.lines = [] + entity.triangles_length = 0 + entity.triangles = [] + entity.texts_length = 1 + entity.texts = [text] + entity.models_length = 0 + entity.models = [] + + return entity + + def scene_entity_label(self) -> str: + return f"{self.track_id}/{self.name} ({self.confidence:.0%})" + + +T = TypeVar("T", bound="Detection2D") + + +class ImageDetections3D(ImageDetections[Detection3D]): + """Specialized class for 3D detections in an image.""" + + def to_foxglove_scene_update(self) -> "SceneUpdate": + """Convert all detections to a Foxglove SceneUpdate message. + + Returns: + SceneUpdate containing SceneEntity objects for all detections + """ + + # Create SceneUpdate message with all detections + scene_update = SceneUpdate() + scene_update.deletions_length = 0 + scene_update.deletions = [] + scene_update.entities = [] + + # Process each detection + for i, detection in enumerate(self.detections): + entity = detection.to_foxglove_scene_entity(entity_id=f"detection_{detection.name}_{i}") + scene_update.entities.append(entity) + + scene_update.entities_length = len(scene_update.entities) + return scene_update diff --git a/dimos/perception/detection2d/type/imageDetections.py b/dimos/perception/detection2d/type/imageDetections.py new file mode 100644 index 0000000000..6b6f03d4c1 --- /dev/null +++ b/dimos/perception/detection2d/type/imageDetections.py @@ -0,0 +1,157 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import hashlib +from typing import TYPE_CHECKING, Any, Dict, Generic, List, Optional, Tuple, TypeVar + +from rich.console import Console +from rich.table import Table +from rich.text import Text + +from dimos.msgs.foxglove_msgs import ImageAnnotations +from dimos.msgs.sensor_msgs import Image +from dimos.msgs.std_msgs import Header +from dimos.msgs.vision_msgs import Detection2DArray +from dimos.types.timestamped import to_timestamp + +if TYPE_CHECKING: + from dimos.perception.detection2d.type.detection2d import Detection2D + +T = TypeVar("T", bound="Detection2D") + + +def _hash_to_color(name: str) -> str: + """Generate a consistent color for a given name using hash.""" + # List of rich colors to choose from + colors = [ + "cyan", + "magenta", + "yellow", + "blue", + "green", + "red", + "bright_cyan", + "bright_magenta", + "bright_yellow", + "bright_blue", + "bright_green", + "bright_red", + "purple", + "white", + "pink", + ] + + # Hash the name and pick a color + hash_value = hashlib.md5(name.encode()).digest()[0] + return colors[hash_value % len(colors)] + + +class TableStr: + def __str__(self): + console = Console(force_terminal=True, legacy_windows=False) + + # Create a table for detections + table = Table( + title=f"{self.__class__.__name__} [{len(self.detections)} detections @ {to_timestamp(self.image.ts):.3f}]", + show_header=True, + show_edge=True, + ) + + # Dynamically build columns based on the first detection's dict keys + if not self.detections: + return ( + f" {self.__class__.__name__} [0 detections @ {to_timestamp(self.image.ts):.3f}]" + ) + + # Cache all repr_dicts to avoid double computation + detection_dicts = [det.to_repr_dict() for det in self] + + first_dict = detection_dicts[0] + table.add_column("#", style="dim") + for col in first_dict.keys(): + color = _hash_to_color(col) + table.add_column(col.title(), style=color) + + # Add each detection to the table + for i, d in enumerate(detection_dicts): + row = [str(i)] + + for key in first_dict.keys(): + if key == "conf": + # Color-code confidence + conf_color = ( + "green" + if float(d[key]) > 0.8 + else "yellow" + if float(d[key]) > 0.5 + else "red" + ) + row.append(Text(f"{d[key]}", style=conf_color)) + elif key == "points" and d.get(key) == "None": + row.append(Text(d.get(key, ""), style="dim")) + else: + row.append(str(d.get(key, ""))) + table.add_row(*row) + + with console.capture() as capture: + console.print(table) + return capture.get().strip() + + +class ImageDetections(Generic[T], TableStr): + image: Image + detections: List[T] + + @property + def ts(self) -> float: + return self.image.ts + + def __init__(self, image: Image, detections: Optional(List[T]) = None): + self.image = image + self.detections = detections or [] + for det in self.detections: + if not det.ts: + det.ts = image.ts + + def __len__(self): + return len(self.detections) + + def __iter__(self): + return iter(self.detections) + + def __getitem__(self, index): + return self.detections[index] + + def to_ros_detection2d_array(self) -> Detection2DArray: + return Detection2DArray( + detections_length=len(self.detections), + header=Header(self.image.ts, "camera_optical"), + detections=[det.to_ros_detection2d() for det in self.detections], + ) + + def to_foxglove_annotations(self) -> ImageAnnotations: + def flatten(xss): + return [x for xs in xss for x in xs] + + texts = flatten(det.to_text_annotation() for det in self.detections) + points = flatten(det.to_points_annotation() for det in self.detections) + + return ImageAnnotations( + texts=texts, + texts_length=len(texts), + points=points, + points_length=len(points), + ) diff --git a/dimos/perception/detection2d/type/test_detection2d.py b/dimos/perception/detection2d/type/test_detection2d.py new file mode 100644 index 0000000000..59f043556e --- /dev/null +++ b/dimos/perception/detection2d/type/test_detection2d.py @@ -0,0 +1,222 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest + +from dimos.perception.detection2d import testing + + +@pytest.fixture(scope="session") +def detection2d(): + """Fixture to load and provide a 2D detection instance for testing.""" + moment = testing.detections2d() + detections = moment["detections2d"] + assert detections, "No detections found in test data" + assert len(detections.detections) > 0, "No individual detections in ImageDetections2D" + return detections.detections[0] + + +@pytest.fixture(scope="session") +def image_detections2d(): + """Fixture to load the full ImageDetections2D object.""" + moment = testing.detections2d() + return moment["detections2d"] + + +def test_detection_basic_properties(detection2d): + """Test basic detection properties.""" + assert detection2d.track_id >= 0 + assert detection2d.class_id >= 0 + assert 0.0 <= detection2d.confidence <= 1.0 + assert detection2d.name is not None + assert detection2d.ts > 0 + + +def test_bounding_box_format(detection2d): + """Test bounding box format and validity.""" + bbox = detection2d.bbox + assert len(bbox) == 4, "Bounding box should have 4 values" + + x1, y1, x2, y2 = bbox + assert x2 > x1, "x2 should be greater than x1" + assert y2 > y1, "y2 should be greater than y1" + assert x1 >= 0, "x1 should be non-negative" + assert y1 >= 0, "y1 should be non-negative" + + +def test_bbox_2d_volume(detection2d): + """Test bounding box volume calculation.""" + volume = detection2d.bbox_2d_volume() + assert volume > 0, "Bounding box volume should be positive" + + # Calculate expected volume + x1, y1, x2, y2 = detection2d.bbox + expected_volume = (x2 - x1) * (y2 - y1) + assert volume == pytest.approx(expected_volume, abs=0.001) + + +def test_bbox_center_calculation(detection2d): + """Test bounding box center calculation.""" + center_bbox = detection2d.get_bbox_center() + assert len(center_bbox) == 4, "Center bbox should have 4 values" + + center_x, center_y, width, height = center_bbox + x1, y1, x2, y2 = detection2d.bbox + + # Verify center calculations + assert center_x == pytest.approx((x1 + x2) / 2.0, abs=0.001) + assert center_y == pytest.approx((y1 + y2) / 2.0, abs=0.001) + assert width == pytest.approx(x2 - x1, abs=0.001) + assert height == pytest.approx(y2 - y1, abs=0.001) + + +def test_cropped_image(detection2d): + """Test cropped image generation.""" + padding = 20 + cropped = detection2d.cropped_image(padding=padding) + + assert cropped is not None, "Cropped image should not be None" + + # The actual cropped image is (260, 192, 3) + assert cropped.width == 192 + assert cropped.height == 260 + assert cropped.shape == (260, 192, 3) + + +def test_to_ros_bbox(detection2d): + """Test ROS bounding box conversion.""" + ros_bbox = detection2d.to_ros_bbox() + + assert ros_bbox is not None + assert hasattr(ros_bbox, "center") + assert hasattr(ros_bbox, "size_x") + assert hasattr(ros_bbox, "size_y") + + # Verify values match + center_x, center_y, width, height = detection2d.get_bbox_center() + assert ros_bbox.center.position.x == pytest.approx(center_x, abs=0.001) + assert ros_bbox.center.position.y == pytest.approx(center_y, abs=0.001) + assert ros_bbox.size_x == pytest.approx(width, abs=0.001) + assert ros_bbox.size_y == pytest.approx(height, abs=0.001) + + +def test_to_text_annotation(detection2d): + """Test text annotation generation.""" + text_annotations = detection2d.to_text_annotation() + + # Actually returns 2 annotations + assert len(text_annotations) == 2, "Should have two text annotations" + + # First annotation: confidence + text0 = text_annotations[0] + assert text0.text == "confidence: 0.815" + assert text0.position.x == pytest.approx(503.437, abs=0.001) + assert text0.position.y == pytest.approx(489.829, abs=0.001) + + # Second annotation: name_class_track + text1 = text_annotations[1] + assert text1.text == "suitcase_28_1" + assert text1.position.x == pytest.approx(503.437, abs=0.001) + assert text1.position.y == pytest.approx(249.894, abs=0.001) + + +def test_to_points_annotation(detection2d): + """Test points annotation generation for bbox corners.""" + points_annotations = detection2d.to_points_annotation() + + assert len(points_annotations) == 1, "Should have one points annotation" + points = points_annotations[0] + + # Actually has 4 points forming a LINE_LOOP + assert points.points_length == 4 + assert points.type == 2 # LINE_LOOP + + x1, y1, x2, y2 = detection2d.bbox + expected_corners = [ + (x1, y1), # Top-left + (x1, y2), # Bottom-left + (x2, y2), # Bottom-right + (x2, y1), # Top-right + ] + + for i, (expected_x, expected_y) in enumerate(expected_corners): + point = points.points[i] + assert point.x == pytest.approx(expected_x, abs=0.001) + assert point.y == pytest.approx(expected_y, abs=0.001) + + +def test_to_image_annotations(detection2d): + """Test complete image annotations generation.""" + annotations = detection2d.to_image_annotations() + + assert annotations is not None + assert hasattr(annotations, "points") + assert hasattr(annotations, "texts") + + # Has 1 points annotation and 2 text annotations + assert annotations.points_length == 1 + assert annotations.texts_length == 2 + + +def test_to_repr_dict(detection2d): + """Test dictionary representation for display.""" + repr_dict = detection2d.to_repr_dict() + + assert "name" in repr_dict + assert "class" in repr_dict + assert "track" in repr_dict + assert "conf" in repr_dict + assert "bbox" in repr_dict + + # Verify string formatting + assert repr_dict["class"] == str(detection2d.class_id) + assert repr_dict["track"] == str(detection2d.track_id) + assert repr_dict["conf"] == f"{detection2d.confidence:.2f}" + + +def test_image_detections2d_properties(image_detections2d): + """Test ImageDetections2D container properties.""" + assert hasattr(image_detections2d, "detections") + assert hasattr(image_detections2d, "image") + assert hasattr(image_detections2d, "ts") + + # Check that all detections share the same image and timestamp + for det in image_detections2d.detections: + assert det.image is image_detections2d.image + assert det.ts == image_detections2d.ts + + +def test_ros_detection2d_conversion(detection2d): + """Test conversion to ROS Detection2D message.""" + ros_det = detection2d.to_ros_detection2d() + + assert ros_det is not None + assert hasattr(ros_det, "header") + assert hasattr(ros_det, "results") + assert hasattr(ros_det, "bbox") + assert hasattr(ros_det, "id") + + # Check header + assert ros_det.header.stamp.sec > 0 + assert ros_det.header.frame_id == "camera_link" + + # Check detection ID + assert ros_det.id == str(detection2d.track_id) + + # Currently results_length is 0 in the implementation + assert ros_det.results_length == 0 + + # Check bbox is set + assert ros_det.bbox is not None diff --git a/dimos/perception/detection2d/type/test_detection3d.py b/dimos/perception/detection2d/type/test_detection3d.py new file mode 100644 index 0000000000..b8f67f1585 --- /dev/null +++ b/dimos/perception/detection2d/type/test_detection3d.py @@ -0,0 +1,152 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest + +from dimos.perception.detection2d import testing + + +@pytest.fixture(scope="session") +def detection3d(): + """Fixture to load and provide a 3D detection instance for testing.""" + moment: testing.Moment3D = testing.detections3d() + detections = moment["detections3d"] + assert detections, "No detections found in test data" + return detections[0] + + +def test_oriented_bounding_box(detection3d): + """Test oriented bounding box calculation and values.""" + obb = detection3d.get_oriented_bounding_box() + assert obb is not None, "Oriented bounding box should not be None" + + # Verify OBB center values + assert obb.center[0] == pytest.approx(-3.36002, abs=0.1) + assert obb.center[1] == pytest.approx(-0.196446, abs=0.1) + assert obb.center[2] == pytest.approx(0.106373, abs=0.1) + + # Verify OBB extent values + assert obb.extent[0] == pytest.approx(0.714664, abs=0.1) + assert obb.extent[1] == pytest.approx(0.461054, abs=0.1) + assert obb.extent[2] == pytest.approx(0.407777, abs=0.1) + + +def test_bounding_box_dimensions(detection3d): + """Test bounding box dimension calculation.""" + dims = detection3d.get_bounding_box_dimensions() + assert len(dims) == 3, "Bounding box dimensions should have 3 values" + assert dims[0] == pytest.approx(0.500, abs=0.1) + assert dims[1] == pytest.approx(0.550, abs=0.1) + assert dims[2] == pytest.approx(0.550, abs=0.1) + + +def test_axis_aligned_bounding_box(detection3d): + """Test axis-aligned bounding box calculation.""" + aabb = detection3d.get_bounding_box() + assert aabb is not None, "Axis-aligned bounding box should not be None" + + # Verify AABB min values + assert aabb.min_bound[0] == pytest.approx(-3.575, abs=0.1) + assert aabb.min_bound[1] == pytest.approx(-0.375, abs=0.1) + assert aabb.min_bound[2] == pytest.approx(-0.075, abs=0.1) + + # Verify AABB max values + assert aabb.max_bound[0] == pytest.approx(-3.075, abs=0.1) + assert aabb.max_bound[1] == pytest.approx(0.175, abs=0.1) + assert aabb.max_bound[2] == pytest.approx(0.475, abs=0.1) + + +def test_point_cloud_properties(detection3d): + """Test point cloud data and boundaries.""" + pc_points = detection3d.pointcloud.points() + assert len(pc_points) == 94, f"Expected 94 points, got {len(pc_points)}" + assert detection3d.pointcloud.frame_id == "world", ( + f"Expected frame_id 'world', got '{detection3d.pointcloud.frame_id}'" + ) + + # Extract xyz coordinates from points + points = np.array([[pt[0], pt[1], pt[2]] for pt in pc_points]) + + min_pt = np.min(points, axis=0) + max_pt = np.max(points, axis=0) + center = np.mean(points, axis=0) + + # Verify point cloud boundaries + assert min_pt[0] == pytest.approx(-3.575, abs=0.1) + assert min_pt[1] == pytest.approx(-0.375, abs=0.1) + assert min_pt[2] == pytest.approx(-0.075, abs=0.1) + + assert max_pt[0] == pytest.approx(-3.075, abs=0.1) + assert max_pt[1] == pytest.approx(0.175, abs=0.1) + assert max_pt[2] == pytest.approx(0.475, abs=0.1) + + assert center[0] == pytest.approx(-3.326, abs=0.1) + assert center[1] == pytest.approx(-0.202, abs=0.1) + assert center[2] == pytest.approx(0.160, abs=0.1) + + +def test_foxglove_scene_entity_generation(detection3d): + """Test Foxglove scene entity creation and structure.""" + entity = detection3d.to_foxglove_scene_entity("test_entity_123") + + # Verify entity metadata + assert entity.id == "1", f"Expected entity ID '1', got '{entity.id}'" + assert entity.frame_id == "world", f"Expected frame_id 'world', got '{entity.frame_id}'" + assert entity.cubes_length == 1, f"Expected 1 cube, got {entity.cubes_length}" + assert entity.texts_length == 1, f"Expected 1 text, got {entity.texts_length}" + + +def test_foxglove_cube_properties(detection3d): + """Test Foxglove cube primitive properties.""" + entity = detection3d.to_foxglove_scene_entity("test_entity_123") + cube = entity.cubes[0] + + # Verify position + assert cube.pose.position.x == pytest.approx(-3.325, abs=0.1) + assert cube.pose.position.y == pytest.approx(-0.100, abs=0.1) + assert cube.pose.position.z == pytest.approx(0.200, abs=0.1) + + # Verify size + assert cube.size.x == pytest.approx(0.500, abs=0.1) + assert cube.size.y == pytest.approx(0.550, abs=0.1) + assert cube.size.z == pytest.approx(0.550, abs=0.1) + + # Verify color (green with alpha) + assert cube.color.r == pytest.approx(0.08235294117647059, abs=0.1) + assert cube.color.g == pytest.approx(0.7176470588235294, abs=0.1) + assert cube.color.b == pytest.approx(0.28627450980392155, abs=0.1) + assert cube.color.a == pytest.approx(0.2, abs=0.1) + + +def test_foxglove_text_label(detection3d): + """Test Foxglove text label properties.""" + entity = detection3d.to_foxglove_scene_entity("test_entity_123") + text = entity.texts[0] + + assert text.text == "1/suitcase (81%)", f"Expected text '1/suitcase (81%)', got '{text.text}'" + assert text.pose.position.x == pytest.approx(-3.325, abs=0.1) + assert text.pose.position.y == pytest.approx(-0.100, abs=0.1) + assert text.pose.position.z == pytest.approx(0.575, abs=0.1) + assert text.font_size == 20.0, f"Expected font size 20.0, got {text.font_size}" + + +def test_detection_pose(detection3d): + """Test detection pose and frame information.""" + assert detection3d.pose.x == pytest.approx(-3.327, abs=0.1) + assert detection3d.pose.y == pytest.approx(-0.202, abs=0.1) + assert detection3d.pose.z == pytest.approx(0.160, abs=0.1) + assert detection3d.pose.frame_id == "world", ( + f"Expected frame_id 'world', got '{detection3d.pose.frame_id}'" + ) diff --git a/dimos/perception/detection2d/type/test_detection3d_filters.py b/dimos/perception/detection2d/type/test_detection3d_filters.py new file mode 100644 index 0000000000..2daad4886f --- /dev/null +++ b/dimos/perception/detection2d/type/test_detection3d_filters.py @@ -0,0 +1,27 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +from dimos.perception.detection2d import testing + + +def test_module3d(): + seek = 75 + moment = testing.detections3d(seek=seek, g1=True) + + print(moment.get("detections2d")) + print(moment.get("detections3d")) + + for i in range(3): + testing.publish_moment(moment) + time.sleep(0.1) diff --git a/dimos/perception/detection2d/type/test_object3d.py b/dimos/perception/detection2d/type/test_object3d.py new file mode 100644 index 0000000000..78e24be95c --- /dev/null +++ b/dimos/perception/detection2d/type/test_object3d.py @@ -0,0 +1,242 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from dimos.perception.detection2d import testing +from dimos.perception.detection2d.module2D import Detection2DModule +from dimos.perception.detection2d.module3D import Detection3DModule +from dimos.perception.detection2d.moduleDB import Object3D, ObjectDBModule +from dimos.perception.detection2d.type.detection3d import ImageDetections3D +from dimos.protocol.service import lcmservice as lcm +from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule + + +@pytest.fixture(scope="session", autouse=True) +def setup_lcm(): + """Configure LCM for the test session.""" + lcm.autoconf() + + +@pytest.fixture(scope="session") +def object_db_module(): + """Create and populate an ObjectDBModule with detections from multiple frames.""" + module2d = Detection2DModule() + module3d = Detection3DModule(camera_info=ConnectionModule._camera_info()) + moduleDB = ObjectDBModule( + camera_info=ConnectionModule._camera_info(), + goto=lambda obj_id: None, # No-op for testing + ) + + # Process 5 frames to build up object history + for i in range(5): + seek_value = 10.0 + (i * 2) + moment = testing.get_moment(seek=seek_value) + + # Process 2D detections + imageDetections2d = module2d.process_image_frame(moment["image_frame"]) + + # Get camera transform + camera_transform = moment["tf"].get("camera_optical", moment.get("lidar_frame").frame_id) + + # Process 3D detections + imageDetections3d = module3d.process_frame( + imageDetections2d, moment["lidar_frame"], camera_transform + ) + + # Add to database + moduleDB.add_detections(imageDetections3d) + + return moduleDB + + +@pytest.fixture(scope="session") +def first_object(object_db_module): + """Get the first object from the database.""" + objects = list(object_db_module.objects.values()) + assert len(objects) > 0, "No objects found in database" + return objects[0] + + +@pytest.fixture(scope="session") +def all_objects(object_db_module): + """Get all objects from the database.""" + return list(object_db_module.objects.values()) + + +def test_object_db_module_populated(object_db_module): + """Test that ObjectDBModule is properly populated.""" + assert len(object_db_module.objects) > 0, "Database should contain objects" + assert object_db_module.cnt > 0, "Object counter should be greater than 0" + + +def test_object_db_module_objects_structure(all_objects): + """Test the structure of objects in the database.""" + for obj in all_objects: + assert isinstance(obj, Object3D) + assert hasattr(obj, "track_id") + assert hasattr(obj, "detections") + assert hasattr(obj, "best_detection") + assert hasattr(obj, "center") + assert len(obj.detections) >= 1 + + +def test_object3d_properties(first_object): + """Test basic properties of an Object3D.""" + assert first_object.track_id is not None + assert isinstance(first_object.track_id, str) + assert first_object.name is not None + assert first_object.class_id >= 0 + assert 0.0 <= first_object.confidence <= 1.0 + assert first_object.ts > 0 + assert first_object.frame_id is not None + assert first_object.best_detection is not None + + +def test_object3d_multiple_detections(all_objects): + """Test objects that have been built from multiple detections.""" + # Find objects with multiple detections + multi_detection_objects = [obj for obj in all_objects if len(obj.detections) > 1] + + if multi_detection_objects: + obj = multi_detection_objects[0] + + # Test that confidence is the max of all detections + max_conf = max(d.confidence for d in obj.detections) + assert obj.confidence == max_conf + + # Test that timestamp is the max (most recent) + max_ts = max(d.ts for d in obj.detections) + assert obj.ts == max_ts + + # Test that best_detection has the largest bbox volume + best_volume = obj.best_detection.bbox_2d_volume() + for det in obj.detections: + assert det.bbox_2d_volume() <= best_volume + + +def test_object3d_center(first_object): + """Test Object3D center calculation.""" + assert first_object.center is not None + assert hasattr(first_object.center, "x") + assert hasattr(first_object.center, "y") + assert hasattr(first_object.center, "z") + + # Center should be within reasonable bounds + assert -10 < first_object.center.x < 10 + assert -10 < first_object.center.y < 10 + assert -10 < first_object.center.z < 10 + + +def test_object3d_repr_dict(first_object): + """Test to_repr_dict method.""" + repr_dict = first_object.to_repr_dict() + + assert "object_id" in repr_dict + assert "detections" in repr_dict + assert "center" in repr_dict + + assert repr_dict["object_id"] == first_object.track_id + assert repr_dict["detections"] == len(first_object.detections) + + # Center should be formatted as string with coordinates + assert isinstance(repr_dict["center"], str) + assert repr_dict["center"].startswith("[") + assert repr_dict["center"].endswith("]") + + +def test_object3d_scene_entity_label(first_object): + """Test scene entity label generation.""" + label = first_object.scene_entity_label() + + assert isinstance(label, str) + assert first_object.name in label + assert f"({len(first_object.detections)})" in label + + +def test_object3d_agent_encode(first_object): + """Test agent encoding.""" + encoded = first_object.agent_encode() + + assert isinstance(encoded, dict) + assert "id" in encoded + assert "name" in encoded + assert "detections" in encoded + assert "last_seen" in encoded + + assert encoded["id"] == first_object.track_id + assert encoded["name"] == first_object.name + assert encoded["detections"] == len(first_object.detections) + assert encoded["last_seen"].endswith("s ago") + + +def test_object3d_image_property(first_object): + """Test image property returns best_detection's image.""" + assert first_object.image is not None + assert first_object.image is first_object.best_detection.image + + +def test_object3d_addition(object_db_module): + """Test Object3D addition operator.""" + # Get existing objects from the database + objects = list(object_db_module.objects.values()) + if len(objects) < 2: + pytest.skip("Not enough objects in database") + + # Get detections from two different objects + det1 = objects[0].best_detection + det2 = objects[1].best_detection + + # Create a new object with the first detection + obj = Object3D("test_track_combined", det1) + + # Add the second detection from a different object + combined = obj + det2 + + assert combined.track_id == "test_track_combined" + assert len(combined.detections) == 2 + + # The combined object should have properties from both detections + assert det1 in combined.detections + assert det2 in combined.detections + + # Best detection should be determined by the Object3D logic + assert combined.best_detection is not None + + # Center should be valid (no specific value check since we're using real detections) + assert hasattr(combined, "center") + assert combined.center is not None + + +def test_image_detections3d_scene_update(object_db_module): + """Test ImageDetections3D to Foxglove scene update conversion.""" + # Get some detections + objects = list(object_db_module.objects.values()) + if not objects: + pytest.skip("No objects in database") + + detections = [obj.best_detection for obj in objects[:3]] # Take up to 3 + + image_detections = ImageDetections3D(image=detections[0].image, detections=detections) + + scene_update = image_detections.to_foxglove_scene_update() + + assert scene_update is not None + assert scene_update.entities_length == len(detections) + + for i, entity in enumerate(scene_update.entities): + assert entity.id == str(detections[i].track_id) + assert entity.frame_id == detections[i].frame_id + assert entity.cubes_length == 1 + assert entity.texts_length == 1 diff --git a/dimos/perception/detection2d/yolo_2d_det.py b/dimos/perception/detection2d/yolo_2d_det.py index b9b04165cd..2a53f3ff89 100644 --- a/dimos/perception/detection2d/yolo_2d_det.py +++ b/dimos/perception/detection2d/yolo_2d_det.py @@ -26,7 +26,6 @@ from dimos.utils.data import get_data from dimos.utils.gpu_utils import is_cuda_available from dimos.utils.logging_config import setup_logger -from dimos.utils.path_utils import get_project_root logger = setup_logger("dimos.perception.detection2d.yolo_2d_det") @@ -42,7 +41,7 @@ def __init__(self, model_path="models_yolo", model_name="yolo11n.onnx", device=" device (str): Device to run inference on ('cuda' or 'cpu') """ self.device = device - self.model = YOLO(get_data(model_path) / model_name) + self.model = YOLO(get_data(model_path) / model_name, task="detect") module_dir = os.path.dirname(__file__) self.tracker_config = os.path.join(module_dir, "config", "custom_tracker.yaml") @@ -50,10 +49,10 @@ def __init__(self, model_path="models_yolo", model_name="yolo11n.onnx", device=" if hasattr(onnxruntime, "preload_dlls"): # Handles CUDA 11 / onnxruntime-gpu<=1.18 onnxruntime.preload_dlls(cuda=True, cudnn=True) self.device = "cuda" - logger.info("Using CUDA for YOLO 2d detector") + logger.debug("Using CUDA for YOLO 2d detector") else: self.device = "cpu" - logger.info("Using CPU for YOLO 2d detector") + logger.debug("Using CPU for YOLO 2d detector") def process_image(self, image): """ diff --git a/dimos/protocol/tf/tf.py b/dimos/protocol/tf/tf.py index 9d1db5ed16..ec31c8e5eb 100644 --- a/dimos/protocol/tf/tf.py +++ b/dimos/protocol/tf/tf.py @@ -317,6 +317,18 @@ def publish(self, *args: Transform) -> None: def publish_static(self, *args: Transform) -> None: raise NotImplementedError("Static transforms not implemented in PubSubTF.") + def publish_all(self) -> None: + """Publish all transforms currently stored in all buffers.""" + all_transforms = [] + for buffer in self.buffers.values(): + # Get the latest transform from each buffer + latest = buffer.get() # get() with no args returns latest + if latest: + all_transforms.append(latest) + + if all_transforms: + self.publish(*all_transforms) + def get( self, parent_frame: str, diff --git a/dimos/robot/nav_bot.py b/dimos/robot/nav_bot.py new file mode 100644 index 0000000000..c8359c9a1c --- /dev/null +++ b/dimos/robot/nav_bot.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +NavBot class for navigation-related functionality. +Encapsulates ROS bridge and topic remapping for Unitree robots. +""" + +import logging +import time + +from dimos import core +from dimos.core import Module, In, Out, rpc +from dimos.msgs.geometry_msgs import PoseStamped, TwistStamped, Transform, Vector3 +from dimos.msgs.nav_msgs import Odometry +from dimos.msgs.sensor_msgs import PointCloud2, Joy, Image +from dimos.msgs.std_msgs import Bool +from dimos.msgs.tf2_msgs.TFMessage import TFMessage +from dimos.protocol.tf import TF +from dimos.robot.ros_bridge import ROSBridge, BridgeDirection +from dimos.utils.transform_utils import euler_to_quaternion +from geometry_msgs.msg import TwistStamped as ROSTwistStamped +from geometry_msgs.msg import PoseStamped as ROSPoseStamped +from nav_msgs.msg import Odometry as ROSOdometry +from sensor_msgs.msg import PointCloud2 as ROSPointCloud2, Joy as ROSJoy, Image as ROSImage +from std_msgs.msg import Bool as ROSBool +from tf2_msgs.msg import TFMessage as ROSTFMessage +from dimos.utils.logging_config import setup_logger +from dimos.protocol.pubsub.lcmpubsub import LCM, Topic + +logger = setup_logger("dimos.robot.unitree_webrtc.nav_bot", level=logging.INFO) + +############################################################ +# Navigation Module + +# first run unitree_g1.py to start the ROS bridge and webrtc connection and teleop +# python dimos/robot/unitree_webrtc/unitree_g1.py + + +# then deploy this module in any other run file. +############################################################ +class NavigationModule(Module): + goal_reached: In[Bool] = None + + goal_pose: Out[PoseStamped] = None + cancel_goal: Out[Bool] = None + joy: Out[Joy] = None + + def __init__(self, *args, **kwargs): + """Initialize NavigationModule.""" + Module.__init__(self, *args, **kwargs) + self.goal_reach = None + + @rpc + def start(self): + """Start the navigation module.""" + if self.goal_reached: + self.goal_reached.subscribe(self._on_goal_reached) + logger.info("NavigationModule started") + + def _on_goal_reached(self, msg: Bool): + """Handle goal reached status messages.""" + self.goal_reach = msg.data + + def _set_autonomy_mode(self): + """ + Set autonomy mode by publishing Joy message. + """ + + joy_msg = Joy( + frame_id="dimos", + axes=[ + 0.0, # axis 0 + 0.0, # axis 1 + -1.0, # axis 2 + 0.0, # axis 3 + 1.0, # axis 4 + 1.0, # axis 5 + 0.0, # axis 6 + 0.0, # axis 7 + ], + buttons=[ + 0, # button 0 + 0, # button 1 + 0, # button 2 + 0, # button 3 + 0, # button 4 + 0, # button 5 + 0, # button 6 + 1, # button 7 - controls autonomy mode + 0, # button 8 + 0, # button 9 + 0, # button 10 + ], + ) + + if self.joy: + self.joy.publish(joy_msg) + logger.info(f"Setting autonomy mode via Joy message") + + @rpc + def go_to(self, pose: PoseStamped, timeout: float = 60.0) -> bool: + """ + Navigate to a target pose by publishing to LCM topics. + + Args: + pose: Target pose to navigate to + blocking: If True, block until goal is reached + timeout: Maximum time to wait for goal (seconds) + + Returns: + True if navigation was successful (or started if non-blocking) + """ + logger.info( + f"Navigating to goal: ({pose.position.x:.2f}, {pose.position.y:.2f}, {pose.position.z:.2f})" + ) + + self.goal_reach = None + self._set_autonomy_mode() + self.goal_pose.publish(pose) + + start_time = time.time() + while time.time() - start_time < timeout: + if self.goal_reach is not None: + return self.goal_reach + time.sleep(0.1) + + self.stop() + + logger.warning(f"Navigation timed out after {timeout} seconds") + return False + + @rpc + def stop(self) -> bool: + """ + Cancel current navigation by publishing to cancel_goal. + + Returns: + True if cancel command was sent successfully + """ + logger.info("Cancelling navigation") + + if self.cancel_goal: + cancel_msg = Bool(data=True) + self.cancel_goal.publish(cancel_msg) + return True + + return False + + +class TopicRemapModule(Module): + """Module that remaps Odometry to PoseStamped and publishes static transforms.""" + + odom: In[Odometry] = None + odom_pose: Out[PoseStamped] = None + + def __init__(self, sensor_to_base_link_transform=None, *args, **kwargs): + Module.__init__(self, *args, **kwargs) + self.tf = TF() + self.sensor_to_base_link_transform = sensor_to_base_link_transform or [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ] + + @rpc + def start(self): + self.odom.subscribe(self._publish_odom_pose) + logger.info("TopicRemapModule started") + + def _publish_odom_pose(self, msg: Odometry): + pose_msg = PoseStamped( + ts=msg.ts, + frame_id=msg.frame_id, + position=msg.pose.pose.position, + orientation=msg.pose.pose.orientation, + ) + self.odom_pose.publish(pose_msg) + + # Publish static transform from sensor to base_link + translation = Vector3( + self.sensor_to_base_link_transform[0], + self.sensor_to_base_link_transform[1], + self.sensor_to_base_link_transform[2], + ) + euler_angles = Vector3( + self.sensor_to_base_link_transform[3], + self.sensor_to_base_link_transform[4], + self.sensor_to_base_link_transform[5], + ) + rotation = euler_to_quaternion(euler_angles) + + sensor_to_base_link_tf = Transform( + translation=translation, + rotation=rotation, + frame_id="sensor", + child_frame_id="base_link", + ts=msg.ts, + ) + + # map to world static transform + map_to_world_tf = Transform( + translation=Vector3(0.0, 0.0, 0.0), + rotation=euler_to_quaternion(Vector3(0.0, 0.0, 0.0)), + frame_id="map", + child_frame_id="world", + ts=msg.ts, + ) + + self.tf.publish(sensor_to_base_link_tf, map_to_world_tf) + + +class NavBot: + """ + NavBot class for navigation-related functionality. + Manages ROS bridge and topic remapping for navigation. + """ + + def __init__(self, dimos=None, sensor_to_base_link_transform=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]): + """ + Initialize NavBot. + + Args: + dimos: DIMOS instance (creates new one if None) + sensor_to_base_link_transform: Optional [x, y, z, roll, pitch, yaw] transform from sensor to base_link + """ + if dimos is None: + self.dimos = core.start(2) + else: + self.dimos = dimos + + self.sensor_to_base_link_transform = sensor_to_base_link_transform + self.ros_bridge = None + self.topic_remap_module = None + self.tf = TF() + self.lcm = LCM() + + def start(self): + if self.topic_remap_module: + self.topic_remap_module.start() + logger.info("Topic remap module started") + + if self.ros_bridge: + logger.info("ROS bridge started") + + def deploy_navigation_modules(self, bridge_name="nav_bot_ros_bridge"): + # Deploy topic remap module + logger.info("Deploying topic remap module...") + self.topic_remap_module = self.dimos.deploy( + TopicRemapModule, sensor_to_base_link_transform=self.sensor_to_base_link_transform + ) + self.topic_remap_module.odom.transport = core.LCMTransport("/odom", Odometry) + self.topic_remap_module.odom_pose.transport = core.LCMTransport("/odom_pose", PoseStamped) + + # Deploy ROS bridge + logger.info("Deploying ROS bridge...") + self.ros_bridge = ROSBridge(bridge_name) + + # Configure ROS topics + self.ros_bridge.add_topic( + "/cmd_vel", TwistStamped, ROSTwistStamped, direction=BridgeDirection.ROS_TO_DIMOS + ) + self.ros_bridge.add_topic( + "/state_estimation", + Odometry, + ROSOdometry, + direction=BridgeDirection.ROS_TO_DIMOS, + remap_topic="/odom", + ) + self.ros_bridge.add_topic( + "/tf", TFMessage, ROSTFMessage, direction=BridgeDirection.ROS_TO_DIMOS + ) + self.ros_bridge.add_topic( + "/registered_scan", PointCloud2, ROSPointCloud2, direction=BridgeDirection.ROS_TO_DIMOS + ) + self.ros_bridge.add_topic("/joy", Joy, ROSJoy, direction=BridgeDirection.DIMOS_TO_ROS) + # Navigation control topics from autonomy stack + self.ros_bridge.add_topic( + "/goal_pose", PoseStamped, ROSPoseStamped, direction=BridgeDirection.DIMOS_TO_ROS + ) + self.ros_bridge.add_topic( + "/cancel_goal", Bool, ROSBool, direction=BridgeDirection.DIMOS_TO_ROS + ) + self.ros_bridge.add_topic( + "/goal_reached", Bool, ROSBool, direction=BridgeDirection.ROS_TO_DIMOS + ) + + # self.ros_bridge.add_topic( + # "/camera/image", Image, ROSImage, direction=BridgeDirection.ROS_TO_DIMOS + # ) + + def _set_autonomy_mode(self): + """ + Set autonomy mode by publishing Joy message. + """ + + joy_msg = Joy( + frame_id="dimos", + axes=[ + 0.0, # axis 0 + 0.0, # axis 1 + -1.0, # axis 2 + 0.0, # axis 3 + 1.0, # axis 4 + 1.0, # axis 5 + 0.0, # axis 6 + 0.0, # axis 7 + ], + buttons=[ + 0, # button 0 + 0, # button 1 + 0, # button 2 + 0, # button 3 + 0, # button 4 + 0, # button 5 + 0, # button 6 + 1, # button 7 - controls autonomy mode + 0, # button 8 + 0, # button 9 + 0, # button 10 + ], + ) + + self.lcm.publish(Topic("/joy", Joy), joy_msg) + + def navigate_to_goal(self, pose: PoseStamped, blocking: bool = True, timeout: float = 30.0): + """Navigate to a target pose using ROS topics. + + Args: + pose: Target pose to navigate to + blocking: If True, block until goal is reached. If False, return immediately. + timeout: Maximum time to wait for goal to be reached (seconds) + + Returns: + If blocking=True: True if navigation was successful, False otherwise + If blocking=False: True if goal was sent successfully + """ + logger.info( + f"Navigating to goal: ({pose.position.x:.2f}, {pose.position.y:.2f}, {pose.position.z:.2f})" + ) + + # Publish goal to /goal_pose topic + self._set_autonomy_mode() + goal_topic = Topic("/goal_pose", PoseStamped) + self.lcm.publish(goal_topic, pose) + + if not blocking: + return True + + # Wait for goal_reached signal + goal_reached_topic = Topic("/goal_reached", Bool) + start_time = time.time() + + while time.time() - start_time < timeout: + try: + msg = self.lcm.wait_for_message(goal_reached_topic, timeout=0.5) + if msg and msg.data: + logger.info("Navigation goal reached") + return True + elif msg and not msg.data: + logger.info("Navigation was cancelled or failed") + return False + except Exception: + # Timeout on wait_for_message, continue looping + pass + + logger.warning(f"Navigation timed out after {timeout} seconds") + return False + + def cancel_navigation(self) -> bool: + """Cancel the current navigation goal using ROS topics. + + Returns: + True if cancel command was sent successfully + """ + logger.info("Cancelling navigation goal") + + # Publish cancel command to /cancel_goal topic + cancel_topic = Topic("/cancel_goal", Bool) + cancel_msg = Bool(data=True) + self.lcm.publish(cancel_topic, cancel_msg) + + return True + + def shutdown(self): + logger.info("Shutting down navigation modules...") + + if self.ros_bridge is not None: + try: + self.ros_bridge.shutdown() + logger.info("ROS bridge shut down successfully") + except Exception as e: + logger.error(f"Error shutting down ROS bridge: {e}") diff --git a/dimos/robot/unitree_webrtc/modular/connection_module.py b/dimos/robot/unitree_webrtc/modular/connection_module.py index c6214c4f2c..6e13ed938e 100644 --- a/dimos/robot/unitree_webrtc/modular/connection_module.py +++ b/dimos/robot/unitree_webrtc/modular/connection_module.py @@ -18,6 +18,7 @@ import functools import logging import os +import queue import time import warnings from dataclasses import dataclass @@ -28,7 +29,8 @@ from reactivex import operators as ops from reactivex.observable import Observable -from dimos.core import In, LCMTransport, Module, ModuleConfig, Out, rpc, DimosCluster +from dimos.agents2 import Agent, Output, Reducer, Stream, skill +from dimos.core import DimosCluster, In, LCMTransport, Module, ModuleConfig, Out, rpc from dimos.msgs.foxglove_msgs import ImageAnnotations from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Twist, Vector3 from dimos.msgs.sensor_msgs.Image import Image, sharpness_window @@ -103,9 +105,7 @@ def odom_stream(self): @functools.cache def video_stream(self): print("video stream start") - video_store = TimedSensorReplay( - f"{self.dir_name}/video", - ) + video_store = TimedSensorReplay(f"{self.dir_name}/video") return video_store.stream(**self.replay_config) @@ -136,11 +136,27 @@ class ConnectionModule(Module): default_config = ConnectionModuleConfig + # mega temporary, skill should have a limit decorator for number of + # parallel calls + video_running: bool = False + def __init__(self, connection_type: str = "webrtc", *args, **kwargs): self.connection_config = kwargs self.connection_type = connection_type Module.__init__(self, *args, **kwargs) + @skill(stream=Stream.passive, output=Output.image, reducer=Reducer.latest) + def video_stream_tool(self) -> Image: + """implicit video stream skill, don't call this directly""" + if self.video_running: + return "video stream already running" + self.video_running = True + _queue = queue.Queue(maxsize=1) + self.connection.video_stream().subscribe(_queue.put) + + for image in iter(_queue.get, None): + yield image + @rpc def record(self, recording_name: str): lidar_store: TimedSensorStorage = TimedSensorStorage(f"{recording_name}/lidar") @@ -183,10 +199,8 @@ def resize(image: Image) -> Image: int(originalwidth / image_resize_factor), int(originalheight / image_resize_factor) ) - sharpness = sharpness_window(10, self.connection.video_stream()) - sharpness.subscribe(self.video.publish) - # self.connection.video_stream().subscribe(self.video.publish) - + self.connection.video_stream().subscribe(self.video.publish) + # sharpness_window(15.0, self.connection.video_stream()).subscribe(self.video.publish) # self.connection.video_stream().pipe(ops.map(resize)).subscribe(self.video.publish) self.camera_info_stream().subscribe(self.camera_info.publish) self.movecmd.subscribe(self.connection.move) @@ -278,8 +292,8 @@ def camera_info_stream(self) -> Observable[CameraInfo]: def deploy_connection(dimos: DimosCluster, **kwargs): - # foxglove_bridge = dimos.deploy(FoxgloveBridge) - # foxglove_bridge.start() + foxglove_bridge = dimos.deploy(FoxgloveBridge) + foxglove_bridge.start() connection = dimos.deploy( ConnectionModule, diff --git a/dimos/robot/unitree_webrtc/modular/ivan_unitree.py b/dimos/robot/unitree_webrtc/modular/ivan_unitree.py index 5893248530..73927cf248 100644 --- a/dimos/robot/unitree_webrtc/modular/ivan_unitree.py +++ b/dimos/robot/unitree_webrtc/modular/ivan_unitree.py @@ -16,7 +16,9 @@ import time from dimos_lcm.sensor_msgs import CameraInfo +from lcm_msgs.foxglove_msgs import SceneUpdate +from dimos.agents2.spec import Model, Provider from dimos.core import LCMTransport, start # from dimos.msgs.detection2d import Detection2DArray @@ -24,6 +26,7 @@ from dimos.msgs.sensor_msgs import Image, PointCloud2 from dimos.msgs.vision_msgs import Detection2DArray from dimos.perception.detection2d import Detection3DModule +from dimos.perception.detection2d.moduleDB import ObjectDBModule from dimos.protocol.pubsub import lcm from dimos.robot.unitree_webrtc.modular import deploy_connection, deploy_navigation from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule @@ -34,15 +37,22 @@ def detection_unitree(): dimos = start(6) - connection = deploy_connection(dimos) - connection.start() - # connection.record("unitree_go2_office_walk2") # mapper = deploy_navigation(dimos, connection) + # mapper.start() + + def goto(pose): + print("NAVIGATION REQUESTED:", pose) + return True - module3D = dimos.deploy(Detection3DModule, camera_info=ConnectionModule._camera_info()) + module3D = dimos.deploy( + ObjectDBModule, + goto=goto, + camera_info=ConnectionModule._camera_info(), + ) module3D.image.connect(connection.video) + # module3D.pointcloud.connect(mapper.global_map) module3D.pointcloud.connect(connection.lidar) module3D.annotations.transport = LCMTransport("/annotations", ImageAnnotations) @@ -55,16 +65,37 @@ def detection_unitree(): module3D.detected_image_0.transport = LCMTransport("/detected/image/0", Image) module3D.detected_image_1.transport = LCMTransport("/detected/image/1", Image) module3D.detected_image_2.transport = LCMTransport("/detected/image/2", Image) + + module3D.scene_update.transport = LCMTransport("/scene_update", SceneUpdate) + module3D.start() - # detection.start() + connection.start() + + from dimos.agents2 import Agent, Output, Reducer, Stream, skill + from dimos.agents2.cli.human import HumanInput + + agent = Agent( + system_prompt="You are a helpful assistant for controlling a Unitree Go2 robot. ", + model=Model.GPT_4O, # Could add CLAUDE models to enum + provider=Provider.OPENAI, # Would need ANTHROPIC provider + ) + + human_input = dimos.deploy(HumanInput) + agent.register_skills(human_input) + # agent.register_skills(connection) + agent.register_skills(module3D) + + # agent.run_implicit_skill("video_stream_tool") + agent.run_implicit_skill("human") + + agent.start() + agent.loop_thread() try: while True: time.sleep(1) except KeyboardInterrupt: connection.stop() - # mapper.stop() - # detection.stop() logger.info("Shutting down...") diff --git a/dimos/robot/unitree_webrtc/modular/navigation.py b/dimos/robot/unitree_webrtc/modular/navigation.py index 8ceaf0e195..c37cac700a 100644 --- a/dimos/robot/unitree_webrtc/modular/navigation.py +++ b/dimos/robot/unitree_webrtc/modular/navigation.py @@ -27,7 +27,7 @@ def deploy_navigation(dimos, connection): - mapper = dimos.deploy(Map, voxel_size=0.5, cost_resolution=0.05, global_publish_interval=1.0) + mapper = dimos.deploy(Map, voxel_size=0.5, cost_resolution=0.05, global_publish_interval=0.5) mapper.lidar.connect(connection.lidar) mapper.global_map.transport = LCMTransport("/global_map", LidarMessage) mapper.global_costmap.transport = LCMTransport("/global_costmap", OccupancyGrid) diff --git a/dimos/robot/unitree_webrtc/rosnav.py b/dimos/robot/unitree_webrtc/rosnav.py new file mode 100644 index 0000000000..487f1c1f71 --- /dev/null +++ b/dimos/robot/unitree_webrtc/rosnav.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import time + +from dimos import core +from dimos.core import Module, In, Out, rpc +from dimos.msgs.geometry_msgs import PoseStamped, TwistStamped, Transform, Vector3 +from dimos.msgs.nav_msgs import Odometry +from dimos.msgs.sensor_msgs import PointCloud2, Joy +from dimos.msgs.std_msgs import Bool, Int8 +from dimos.protocol.tf import TF +from dimos.robot.ros_bridge import ROSBridge, BridgeDirection +from dimos.utils.transform_utils import euler_to_quaternion +from geometry_msgs.msg import TwistStamped as ROSTwistStamped +from geometry_msgs.msg import PoseStamped as ROSPoseStamped +from nav_msgs.msg import Odometry as ROSOdometry +from sensor_msgs.msg import PointCloud2 as ROSPointCloud2 +from std_msgs.msg import Bool as ROSBool +from tf2_msgs.msg import TFMessage as ROSTFMessage +from dimos.utils.logging_config import setup_logger +from dimos.protocol.pubsub.lcmpubsub import LCM, Topic + +logger = setup_logger("dimos.robot.unitree_webrtc.nav_bot", level=logging.INFO) + + +class NavigationModule(Module): + goal_pose: Out[PoseStamped] = None + goal_reached: In[Bool] = None + cancel_goal: Out[Bool] = None + soft_stop: Out[Int8] = None + joy: Out[Joy] = None + + def __init__(self, *args, **kwargs): + """Initialize NavigationModule.""" + Module.__init__(self, *args, **kwargs) + self.goal_reach = None + + @rpc + def start(self): + """Start the navigation module.""" + if self.goal_reached: + self.goal_reached.subscribe(self._on_goal_reached) + logger.info("NavigationModule started") + + def _on_goal_reached(self, msg: Bool): + """Handle goal reached status messages.""" + self.goal_reach = msg.data + + def _set_autonomy_mode(self): + """ + Set autonomy mode by publishing Joy message. + """ + + joy_msg = Joy( + frame_id="dimos", + axes=[ + 0.0, # axis 0 + 0.0, # axis 1 + -1.0, # axis 2 + 0.0, # axis 3 + 1.0, # axis 4 + 1.0, # axis 5 + 0.0, # axis 6 + 0.0, # axis 7 + ], + buttons=[ + 0, # button 0 + 0, # button 1 + 0, # button 2 + 0, # button 3 + 0, # button 4 + 0, # button 5 + 0, # button 6 + 1, # button 7 - controls autonomy mode + 0, # button 8 + 0, # button 9 + 0, # button 10 + ], + ) + + if self.joy: + self.joy.publish(joy_msg) + logger.info(f"Setting autonomy mode via Joy message") + + @rpc + def go_to(self, pose: PoseStamped, timeout: float = 60.0) -> bool: + """ + Navigate to a target pose by publishing to LCM topics. + + Args: + pose: Target pose to navigate to + blocking: If True, block until goal is reached + timeout: Maximum time to wait for goal (seconds) + + Returns: + True if navigation was successful (or started if non-blocking) + """ + logger.info( + f"Navigating to goal: ({pose.position.x:.2f}, {pose.position.y:.2f}, {pose.position.z:.2f})" + ) + + self.goal_reach = None + self._set_autonomy_mode() + self.soft_stop.publish(Int8(0)) + self.goal_pose.publish(pose) + time.sleep(0.2) + self.goal_pose.publish(pose) + + start_time = time.time() + while time.time() - start_time < timeout: + if self.goal_reach is not None: + self.soft_stop.publish(Int8(2)) + return self.goal_reach + time.sleep(0.1) + + self.stop() + + logger.warning(f"Navigation timed out after {timeout} seconds") + return False + + @rpc + def stop(self) -> bool: + """ + Cancel current navigation by publishing to cancel_goal. + + Returns: + True if cancel command was sent successfully + """ + logger.info("Cancelling navigation") + + if self.cancel_goal: + cancel_msg = Bool(data=True) + self.cancel_goal.publish(cancel_msg) + self.soft_stop.publish(Int8(2)) + return True + + return False diff --git a/dimos/robot/unitree_webrtc/unitree_b1/test_connection.py b/dimos/robot/unitree_webrtc/unitree_b1/test_connection.py index c54f124f02..e595a1adde 100644 --- a/dimos/robot/unitree_webrtc/unitree_b1/test_connection.py +++ b/dimos/robot/unitree_webrtc/unitree_b1/test_connection.py @@ -271,7 +271,7 @@ def test_watchdog_timing_accuracy(self): # Check timing (should be close to 200ms + up to 50ms watchdog interval) elapsed = timeout_time - start_time print(f"\nWatchdog timeout occurred at exactly {elapsed:.3f} seconds") - assert 0.19 <= elapsed <= 0.26, f"Watchdog timed out at {elapsed:.3f}s, expected ~0.2-0.25s" + assert 0.19 <= elapsed <= 0.3, f"Watchdog timed out at {elapsed:.3f}s, expected ~0.2-0.25s" conn.running = False conn.watchdog_running = False diff --git a/dimos/robot/unitree_webrtc/unitree_g1.py b/dimos/robot/unitree_webrtc/unitree_g1.py index 18f9f95ae0..71b703606b 100644 --- a/dimos/robot/unitree_webrtc/unitree_g1.py +++ b/dimos/robot/unitree_webrtc/unitree_g1.py @@ -18,42 +18,68 @@ Minimal implementation using WebRTC connection for robot control. """ +import logging import os import time -import logging from typing import Optional +from dimos import core +from dimos.agents2 import Agent +from dimos.agents2.cli.human import HumanInput +from dimos.agents2.skills.ros_navigation import RosNavigation +from dimos.agents2.spec import Model, Provider +from dimos.core import In, Module, Out, rpc +from geometry_msgs.msg import PoseStamped as ROSPoseStamped + +from geometry_msgs.msg import PoseStamped as ROSPoseStamped +from geometry_msgs.msg import TwistStamped as ROSTwistStamped + +from dimos_lcm.foxglove_msgs import SceneUpdate +from nav_msgs.msg import Odometry as ROSOdometry +from std_msgs.msg import Int8 as ROSInt8 +from sensor_msgs.msg import PointCloud2 as ROSPointCloud2, Joy as ROSJoy, Image as ROSImage +from tf2_msgs.msg import TFMessage as ROSTFMessage from dimos import core -from dimos.core import Module, In, Out, rpc -from dimos.msgs.geometry_msgs import PoseStamped, Twist, TwistStamped +from dimos.core import In, Module, Out, rpc +from dimos.hardware.camera import zed +from dimos.hardware.camera.module import CameraModule +from dimos.hardware.camera.webcam import Webcam +from dimos.msgs.foxglove_msgs import ImageAnnotations +from dimos.msgs.geometry_msgs import ( + PoseStamped, + Quaternion, + Transform, + Twist, + TwistStamped, + Vector3, +) from dimos.msgs.nav_msgs.Odometry import Odometry -from dimos.msgs.sensor_msgs import Image, CameraInfo, PointCloud2 +from dimos.msgs.sensor_msgs import CameraInfo, Image, Joy, PointCloud2 +from dimos.msgs.std_msgs import Bool, Int8 from dimos.msgs.tf2_msgs.TFMessage import TFMessage +from dimos.msgs.vision_msgs import Detection2DArray +from dimos.perception.detection2d import Detection3DModule +from dimos.perception.detection2d.moduleDB import ObjectDBModule +from dimos.core import Module, In, Out, rpc +from dimos.msgs.geometry_msgs import PoseStamped, Twist, TwistStamped, Vector3, Quaternion +from dimos.msgs.sensor_msgs import Image, CameraInfo +from dimos.perception.spatial_perception import SpatialMemory from dimos.protocol import pubsub from dimos.protocol.pubsub.lcmpubsub import LCM from dimos.robot.foxglove_bridge import FoxgloveBridge -from dimos.web.websocket_vis.websocket_vis_module import WebsocketVisModule +from dimos.robot.robot import Robot +from dimos.robot.ros_bridge import BridgeDirection, ROSBridge from dimos.robot.unitree_webrtc.connection import UnitreeWebRTCConnection +from dimos.robot.unitree_webrtc.rosnav import NavigationModule +from dimos.robot.unitree_webrtc.unitree_g1_skill_container import UnitreeG1SkillContainer from dimos.robot.unitree_webrtc.unitree_skills import MyUnitreeSkills -from dimos.robot.ros_bridge import ROSBridge, BridgeDirection -from geometry_msgs.msg import TwistStamped as ROSTwistStamped -from nav_msgs.msg import Odometry as ROSOdometry -from sensor_msgs.msg import PointCloud2 as ROSPointCloud2 -from tf2_msgs.msg import TFMessage as ROSTFMessage from dimos.skills.skills import SkillLibrary -from dimos.robot.robot import Robot - from dimos.types.robot_capabilities import RobotCapability from dimos.utils.logging_config import setup_logger +from dimos.web.websocket_vis.websocket_vis_module import WebsocketVisModule logger = setup_logger("dimos.robot.unitree_webrtc.unitree_g1", level=logging.INFO) -try: - from dimos.hardware.camera.zed import ZEDModule -except ImportError: - logger.warning("ZEDModule not found. Please install pyzed to use ZED camera functionality.") - ZEDModule = None - # Suppress verbose loggers logging.getLogger("aiortc.codecs.h264").setLevel(logging.ERROR) logging.getLogger("lcm_foxglove_bridge").setLevel(logging.ERROR) @@ -122,6 +148,7 @@ def __init__( enable_joystick: bool = False, enable_connection: bool = True, enable_ros_bridge: bool = True, + enable_perception: bool = False, enable_camera: bool = False, ): """Initialize the G1 robot. @@ -136,7 +163,7 @@ def __init__( enable_joystick: Enable pygame joystick control enable_connection: Enable robot connection module enable_ros_bridge: Enable ROS bridge - enable_camera: Enable ZED camera module + enable_camera: Enable web camera module """ super().__init__() self.ip = ip @@ -146,6 +173,7 @@ def __init__( self.enable_joystick = enable_joystick self.enable_connection = enable_connection self.enable_ros_bridge = enable_ros_bridge + self.enable_perception = enable_perception self.enable_camera = enable_camera self.websocket_port = websocket_port self.lcm = LCM() @@ -165,28 +193,71 @@ def __init__( self.connection = None self.websocket_vis = None self.foxglove_bridge = None + self.spatial_memory_module = None self.joystick = None self.ros_bridge = None - self.zed_camera = None - + self.camera = None self._setup_directories() def _setup_directories(self): - """Setup output directories.""" + """Setup directories for spatial memory storage.""" os.makedirs(self.output_dir, exist_ok=True) logger.info(f"Robot outputs will be saved to: {self.output_dir}") + # Initialize memory directories + self.memory_dir = os.path.join(self.output_dir, "memory") + os.makedirs(self.memory_dir, exist_ok=True) + + # Initialize spatial memory properties + self.spatial_memory_dir = os.path.join(self.memory_dir, "spatial_memory") + self.spatial_memory_collection = "spatial_memory" + self.db_path = os.path.join(self.spatial_memory_dir, "chromadb_data") + self.visual_memory_path = os.path.join(self.spatial_memory_dir, "visual_memory.pkl") + + # Create spatial memory directories + os.makedirs(self.spatial_memory_dir, exist_ok=True) + os.makedirs(self.db_path, exist_ok=True) + + def _deploy_detection(self, goto): + detection = self.dimos.deploy( + ObjectDBModule, goto=goto, camera_info=zed.CameraInfo.SingleWebcam + ) + + detection.image.connect(self.camera.image) + detection.pointcloud.transport = core.LCMTransport("/map", PointCloud2) + + detection.annotations.transport = core.LCMTransport("/annotations", ImageAnnotations) + detection.detections.transport = core.LCMTransport("/detections", Detection2DArray) + + detection.scene_update.transport = core.LCMTransport("/scene_update", SceneUpdate) + detection.target.transport = core.LCMTransport("/target", PoseStamped) + detection.detected_pointcloud_0.transport = core.LCMTransport( + "/detected/pointcloud/0", PointCloud2 + ) + detection.detected_pointcloud_1.transport = core.LCMTransport( + "/detected/pointcloud/1", PointCloud2 + ) + detection.detected_pointcloud_2.transport = core.LCMTransport( + "/detected/pointcloud/2", PointCloud2 + ) + + detection.detected_image_0.transport = core.LCMTransport("/detected/image/0", Image) + detection.detected_image_1.transport = core.LCMTransport("/detected/image/1", Image) + detection.detected_image_2.transport = core.LCMTransport("/detected/image/2", Image) + + self.detection = detection + def start(self): """Start the robot system with all modules.""" - self.dimos = core.start(4) # 2 workers for connection and visualization + self.dimos = core.start(8) # 2 workers for connection and visualization if self.enable_connection: self._deploy_connection() self._deploy_visualization() - if self.enable_camera: - self._deploy_camera() + if self.enable_perception: + self._deploy_perception() if self.enable_joystick: self._deploy_joystick() @@ -194,12 +265,66 @@ def start(self): if self.enable_ros_bridge: self._deploy_ros_bridge() - self._start_modules() + self.nav = self.dimos.deploy(NavigationModule) + self.nav.goal_reached.transport = core.LCMTransport("/goal_reached", Bool) + self.nav.goal_pose.transport = core.LCMTransport("/goal_pose", PoseStamped) + self.nav.cancel_goal.transport = core.LCMTransport("/cancel_goal", Bool) + self.nav.soft_stop.transport = core.LCMTransport("/stop", Int8) + self.nav.joy.transport = core.LCMTransport("/joy", Joy) + self.nav.start() + + self._deploy_camera() + self._deploy_detection(self.nav.go_to) self.lcm.start() + # Setup agent with G1 skills + logger.info("Setting up agent with G1 skills...") + + agent = Agent( + system_prompt="You are a helpful assistant controlling a Unitree G1 humanoid robot. You can control the robot's arms, movement modes, and navigation.", + model=Model.GPT_4O, + provider=Provider.OPENAI, + ) + + # Register G1-specific skill container + g1_skills = UnitreeG1SkillContainer(robot=self) + agent.register_skills(g1_skills) + + human_input = self.dimos.deploy(HumanInput) + agent.register_skills(human_input) + + if self.enable_perception: + agent.register_skills(self.detection) + + # Register ROS navigation + ros_nav = RosNavigation(self) + ros_nav.__enter__() + agent.register_skills(ros_nav) + + agent.run_implicit_skill("human") + agent.start() + + # For logging + skills = [tool.name for tool in agent.get_tools()] + logger.info(f"Agent configured with {len(skills)} skills: {', '.join(skills)}") + + agent.loop_thread() + logger.info("UnitreeG1 initialized and started") logger.info(f"WebSocket visualization available at http://localhost:{self.websocket_port}") + self._start_modules() + + def stop(self): + self.lcm.stop() + + def __enter__(self) -> "UnitreeG1": + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + return False def _deploy_connection(self): """Deploy and configure the connection module.""" @@ -211,42 +336,28 @@ def _deploy_connection(self): self.connection.odom_pose.transport = core.LCMTransport("/odom", PoseStamped) def _deploy_camera(self): - """Deploy and configure the ZED camera module (real or fake based on replay_path).""" - - if self.replay_path: - # Use FakeZEDModule for replay - from dimos.hardware.fake_zed_module import FakeZEDModule - - logger.info(f"Deploying FakeZEDModule for replay from: {self.replay_path}") - self.zed_camera = self.dimos.deploy( - FakeZEDModule, - recording_path=self.replay_path, - frame_id="zed_camera", - ) - else: - # Use real ZEDModule (with optional recording) - logger.info("Deploying ZED camera module...") - self.zed_camera = self.dimos.deploy( - ZEDModule, - camera_id=0, - resolution="HD720", - depth_mode="NEURAL", - fps=30, - enable_tracking=True, # Enable for G1 pose estimation - enable_imu_fusion=True, - set_floor_as_origin=True, - publish_rate=30.0, - frame_id="zed_camera", - recording_path=self.recording_path, # Pass recording path if provided - ) - - # Configure ZED LCM transports (same for both real and fake) - self.zed_camera.color_image.transport = core.LCMTransport("/zed/color_image", Image) - self.zed_camera.depth_image.transport = core.LCMTransport("/zed/depth_image", Image) - self.zed_camera.camera_info.transport = core.LCMTransport("/zed/camera_info", CameraInfo) - self.zed_camera.pose.transport = core.LCMTransport("/zed/pose", PoseStamped) + """Deploy and configure a standard webcam module.""" + logger.info("Deploying standard webcam module...") + + self.camera = self.dimos.deploy( + CameraModule, + transform=Transform( + translation=Vector3(0.05, 0.0, 0.0), + rotation=Quaternion.from_euler(Vector3(0.0, 0.2, 0.0)), + frame_id="sensor", + child_frame_id="camera_link", + ), + hardware=lambda: Webcam( + camera_index=0, + frequency=15, + stereo_slice="left", + camera_info=zed.CameraInfo.SingleWebcam, + ), + ) - logger.info("ZED camera module configured") + self.camera.image.transport = core.LCMTransport("/image", Image) + self.camera.camera_info.transport = core.LCMTransport("/camera_info", CameraInfo) + logger.info("Webcam module configured") def _deploy_visualization(self): """Deploy and configure visualization modules.""" @@ -259,6 +370,20 @@ def _deploy_visualization(self): # Deploy Foxglove bridge self.foxglove_bridge = FoxgloveBridge() + def _deploy_perception(self): + self.spatial_memory_module = self.dimos.deploy( + SpatialMemory, + collection_name=self.spatial_memory_collection, + db_path=self.db_path, + visual_memory_path=self.visual_memory_path, + output_dir=self.spatial_memory_dir, + ) + + self.spatial_memory_module.video.transport = core.LCMTransport("/image", Image) + self.spatial_memory_module.odom.transport = core.LCMTransport("/odom", PoseStamped) + + logger.info("Spatial memory module deployed and connected") + def _deploy_joystick(self): """Deploy joystick control module.""" from dimos.robot.unitree_webrtc.g1_joystick_module import G1JoystickModule @@ -287,9 +412,32 @@ def _deploy_ros_bridge(self): "/tf", TFMessage, ROSTFMessage, direction=BridgeDirection.ROS_TO_DIMOS ) - # Add /registered_scan topic from ROS to DIMOS + self.ros_bridge.add_topic("/stop", Int8, ROSInt8, direction=BridgeDirection.DIMOS_TO_ROS) + + from geometry_msgs.msg import PoseStamped as ROSPoseStamped + from std_msgs.msg import Bool as ROSBool + + from dimos.msgs.std_msgs import Bool + + # Navigation control topics from autonomy stack + self.ros_bridge.add_topic( + "/goal_pose", PoseStamped, ROSPoseStamped, direction=BridgeDirection.DIMOS_TO_ROS + ) + self.ros_bridge.add_topic( + "/cancel_goal", Bool, ROSBool, direction=BridgeDirection.DIMOS_TO_ROS + ) + self.ros_bridge.add_topic( + "/goal_reached", Bool, ROSBool, direction=BridgeDirection.ROS_TO_DIMOS + ) + + self.ros_bridge.add_topic("/joy", Joy, ROSJoy, direction=BridgeDirection.DIMOS_TO_ROS) + self.ros_bridge.add_topic( - "/registered_scan", PointCloud2, ROSPointCloud2, direction=BridgeDirection.ROS_TO_DIMOS + "/registered_scan", + PointCloud2, + ROSPointCloud2, + direction=BridgeDirection.ROS_TO_DIMOS, + remap_topic="/map", ) logger.info( @@ -300,11 +448,17 @@ def _start_modules(self): """Start all deployed modules.""" if self.connection: self.connection.start() - self.websocket_vis.start() + # self.websocket_vis.start() self.foxglove_bridge.start() - if self.joystick: - self.joystick.start() + # if self.joystick: + # self.joystick.start() + + self.camera.start() + self.detection.start() + + if self.enable_perception: + self.spatial_memory_module.start() # Initialize skills after connection is established if self.skill_library is not None: @@ -346,11 +500,16 @@ def shutdown(self): logger.info("UnitreeG1 shutdown complete") + @property + def spatial_memory(self) -> Optional[SpatialMemory]: + return self.spatial_memory_module + def main(): """Main entry point for testing.""" - import os import argparse + import os + from dotenv import load_dotenv load_dotenv() @@ -358,7 +517,7 @@ def main(): parser = argparse.ArgumentParser(description="Unitree G1 Humanoid Robot Control") parser.add_argument("--ip", default=os.getenv("ROBOT_IP"), help="Robot IP address") parser.add_argument("--joystick", action="store_true", help="Enable pygame joystick control") - parser.add_argument("--camera", action="store_true", help="Enable ZED camera module") + parser.add_argument("--camera", action="store_true", help="Enable usb camera module") parser.add_argument("--output-dir", help="Output directory for logs/data") parser.add_argument("--record", help="Path to save recording") parser.add_argument("--replay", help="Path to replay recording from") @@ -376,9 +535,23 @@ def main(): enable_camera=args.camera, enable_connection=os.getenv("ROBOT_IP") is not None, enable_ros_bridge=True, + enable_perception=True, ) robot.start() + # time.sleep(7) + # print("Starting navigation...") + # print( + # robot.nav.go_to( + # PoseStamped( + # ts=time.time(), + # frame_id="map", + # position=Vector3(0.0, 0.0, 0.03), + # orientation=Quaternion(0, 0, 0, 1), + # ), + # timeout=10, + # ), + # ) try: if args.joystick: print("\n" + "=" * 50) diff --git a/dimos/robot/unitree_webrtc/unitree_g1_skill_container.py b/dimos/robot/unitree_webrtc/unitree_g1_skill_container.py new file mode 100644 index 0000000000..ff1bcfad1f --- /dev/null +++ b/dimos/robot/unitree_webrtc/unitree_g1_skill_container.py @@ -0,0 +1,225 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Unitree G1 skill container for the new agents2 framework. +Dynamically generates skills for G1 humanoid robot including arm controls and movement modes. +""" + +from __future__ import annotations + +import datetime +import time +from typing import TYPE_CHECKING, Optional, Union + +from dimos.msgs.geometry_msgs import Twist, TwistStamped, Vector3 +from dimos.protocol.skill.skill import skill +from dimos.protocol.skill.type import Reducer, Stream +from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer +from dimos.utils.logging_config import setup_logger + +if TYPE_CHECKING: + from dimos.robot.unitree_webrtc.unitree_g1 import UnitreeG1 + from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2 + +logger = setup_logger("dimos.robot.unitree_webrtc.unitree_g1_skill_container") + +# G1 Arm Actions - all use api_id 7106 on topic "rt/api/arm/request" +G1_ARM_CONTROLS = [ + ("Handshake", 27, "Perform a handshake gesture with the right hand."), + ("HighFive", 18, "Give a high five with the right hand."), + ("Hug", 19, "Perform a hugging gesture with both arms."), + ("HighWave", 26, "Wave with the hand raised high."), + ("Clap", 17, "Clap hands together."), + ("FaceWave", 25, "Wave near the face level."), + ("LeftKiss", 12, "Blow a kiss with the left hand."), + ("ArmHeart", 20, "Make a heart shape with both arms overhead."), + ("RightHeart", 21, "Make a heart gesture with the right hand."), + ("HandsUp", 15, "Raise both hands up in the air."), + ("XRay", 24, "Hold arms in an X-ray pose position."), + ("RightHandUp", 23, "Raise only the right hand up."), + ("Reject", 22, "Make a rejection or 'no' gesture."), + ("CancelAction", 99, "Cancel any current arm action and return hands to neutral position."), +] + +# G1 Movement Modes - all use api_id 7101 on topic "rt/api/sport/request" +G1_MODE_CONTROLS = [ + ("WalkMode", 500, "Switch to normal walking mode."), + ("WalkControlWaist", 501, "Switch to walking mode with waist control."), + ("RunMode", 801, "Switch to running mode."), +] + + +class UnitreeG1SkillContainer(UnitreeSkillContainer): + """Container for Unitree G1 humanoid robot skills. + + Inherits all Go2 skills and adds G1-specific arm controls and movement modes. + """ + + def __init__(self, robot: Optional[Union[UnitreeG1, UnitreeGo2]] = None): + """Initialize the skill container with robot reference. + + Args: + robot: The UnitreeG1 or UnitreeGo2 robot instance + """ + # Initialize parent class to get all base Unitree skills + super().__init__(robot) + + # Add G1-specific skills on top + self._generate_arm_skills() + self._generate_mode_skills() + + def _generate_arm_skills(self): + """Dynamically generate arm control skills from G1_ARM_CONTROLS list.""" + logger.info(f"Generating {len(G1_ARM_CONTROLS)} G1 arm control skills") + + for name, data_value, description in G1_ARM_CONTROLS: + skill_name = self._convert_to_snake_case(name) + self._create_arm_skill(skill_name, data_value, description, name) + + def _generate_mode_skills(self): + """Dynamically generate movement mode skills from G1_MODE_CONTROLS list.""" + logger.info(f"Generating {len(G1_MODE_CONTROLS)} G1 movement mode skills") + + for name, data_value, description in G1_MODE_CONTROLS: + skill_name = self._convert_to_snake_case(name) + self._create_mode_skill(skill_name, data_value, description, name) + + def _create_arm_skill( + self, skill_name: str, data_value: int, description: str, original_name: str + ): + """Create a dynamic arm control skill method with the @skill decorator. + + Args: + skill_name: Snake_case name for the method + data_value: The arm action data value + description: Human-readable description + original_name: Original CamelCase name for display + """ + + def dynamic_skill_func(self) -> str: + """Dynamic arm skill function.""" + return self._execute_arm_command(data_value, original_name) + + # Set the function's metadata + dynamic_skill_func.__name__ = skill_name + dynamic_skill_func.__doc__ = description + + # Apply the @skill decorator + decorated_skill = skill()(dynamic_skill_func) + + # Bind the method to the instance + bound_method = decorated_skill.__get__(self, self.__class__) + + # Add it as an attribute + setattr(self, skill_name, bound_method) + + logger.debug(f"Generated arm skill: {skill_name} (data={data_value})") + + def _create_mode_skill( + self, skill_name: str, data_value: int, description: str, original_name: str + ): + """Create a dynamic movement mode skill method with the @skill decorator. + + Args: + skill_name: Snake_case name for the method + data_value: The mode data value + description: Human-readable description + original_name: Original CamelCase name for display + """ + + def dynamic_skill_func(self) -> str: + """Dynamic mode skill function.""" + return self._execute_mode_command(data_value, original_name) + + # Set the function's metadata + dynamic_skill_func.__name__ = skill_name + dynamic_skill_func.__doc__ = description + + # Apply the @skill decorator + decorated_skill = skill()(dynamic_skill_func) + + # Bind the method to the instance + bound_method = decorated_skill.__get__(self, self.__class__) + + # Add it as an attribute + setattr(self, skill_name, bound_method) + + logger.debug(f"Generated mode skill: {skill_name} (data={data_value})") + + # ========== Override Skills for G1 ========== + + @skill() + def move(self, x: float, y: float = 0.0, yaw: float = 0.0, duration: float = 0.0) -> str: + """Move the robot using direct velocity commands (G1 version with TwistStamped). + + Args: + x: Forward velocity (m/s) + y: Left/right velocity (m/s) + yaw: Rotational velocity (rad/s) + duration: How long to move (seconds) + """ + if self._robot is None: + return "Error: Robot not connected" + + # G1 uses TwistStamped instead of Twist + twist_stamped = TwistStamped(linear=Vector3(x, y, 0), angular=Vector3(0, 0, yaw)) + self._robot.move(twist_stamped, duration=duration) + return f"Started moving with velocity=({x}, {y}, {yaw}) for {duration} seconds" + + # ========== Helper Methods ========== + + def _execute_arm_command(self, data_value: int, name: str) -> str: + """Execute an arm command through WebRTC interface. + + Args: + data_value: The arm action data value + name: Human-readable name of the command + """ + if self._robot is None: + return f"Error: Robot not connected (cannot execute {name})" + + try: + result = self._robot.connection.publish_request( + "rt/api/arm/request", {"api_id": 7106, "parameter": {"data": data_value}} + ) + message = f"G1 arm action {name} executed successfully (data={data_value})" + logger.info(message) + return message + except Exception as e: + error_msg = f"Failed to execute G1 arm action {name}: {e}" + logger.error(error_msg) + return error_msg + + def _execute_mode_command(self, data_value: int, name: str) -> str: + """Execute a movement mode command through WebRTC interface. + + Args: + data_value: The mode data value + name: Human-readable name of the command + """ + if self._robot is None: + return f"Error: Robot not connected (cannot execute {name})" + + try: + result = self._robot.connection.publish_request( + "rt/api/sport/request", {"api_id": 7101, "parameter": {"data": data_value}} + ) + message = f"G1 mode {name} activated successfully (data={data_value})" + logger.info(message) + return message + except Exception as e: + error_msg = f"Failed to execute G1 mode {name}: {e}" + logger.error(error_msg) + return error_msg diff --git a/dimos/robot/unitree_webrtc/unitree_go2.py b/dimos/robot/unitree_webrtc/unitree_go2.py index 0194652900..7fc14ae41f 100644 --- a/dimos/robot/unitree_webrtc/unitree_go2.py +++ b/dimos/robot/unitree_webrtc/unitree_go2.py @@ -394,7 +394,7 @@ def _setup_directories(self): def start(self): """Start the robot system with all modules.""" - self.dimos = core.start(8) + self.dimos = core.start(8, memory_limit="8GiB") self._deploy_connection() self._deploy_mapping() diff --git a/dimos/types/test_timestamped.py b/dimos/types/test_timestamped.py index 327e97d68b..a4edfe233d 100644 --- a/dimos/types/test_timestamped.py +++ b/dimos/types/test_timestamped.py @@ -317,3 +317,247 @@ def process_video_frame(frame): f"Aligned pair: primary={primary.ts:.6f}, secondary={secondary.ts:.6f}, diff={diff:.6f}s" ) assert diff <= 0.05 + + +def test_timestamp_alignment_primary_first(): + """Test alignment when primary messages arrive before secondary messages.""" + from reactivex import Subject + + primary_subject = Subject() + secondary_subject = Subject() + + results = [] + + # Set up alignment with a 2-second buffer + aligned = align_timestamped( + primary_subject, secondary_subject, buffer_size=2.0, match_tolerance=0.1 + ) + + # Subscribe to collect results + aligned.subscribe(lambda x: results.append(x)) + + # Send primary messages first + primary1 = SimpleTimestamped(1.0, "primary1") + primary2 = SimpleTimestamped(2.0, "primary2") + primary3 = SimpleTimestamped(3.0, "primary3") + + primary_subject.on_next(primary1) + primary_subject.on_next(primary2) + primary_subject.on_next(primary3) + + # At this point, no results should be emitted (no secondaries yet) + assert len(results) == 0 + + # Send secondary messages that match primary1 and primary2 + secondary1 = SimpleTimestamped(1.05, "secondary1") # Matches primary1 + secondary2 = SimpleTimestamped(2.02, "secondary2") # Matches primary2 + + secondary_subject.on_next(secondary1) + assert len(results) == 1 # primary1 should now be matched + assert results[0][0].data == "primary1" + assert results[0][1].data == "secondary1" + + secondary_subject.on_next(secondary2) + assert len(results) == 2 # primary2 should now be matched + assert results[1][0].data == "primary2" + assert results[1][1].data == "secondary2" + + # Send a secondary that's too far from primary3 + secondary_far = SimpleTimestamped(3.5, "secondary_far") # Too far from primary3 + secondary_subject.on_next(secondary_far) + # At this point primary3 is removed as unmatchable since secondary progressed past it + assert len(results) == 2 # primary3 should not match (outside tolerance) + + # Send a new primary that can match with the future secondary + primary4 = SimpleTimestamped(3.45, "primary4") + primary_subject.on_next(primary4) + assert len(results) == 3 # Should match with secondary_far + assert results[2][0].data == "primary4" + assert results[2][1].data == "secondary_far" + + # Complete the streams + primary_subject.on_completed() + secondary_subject.on_completed() + + +def test_timestamp_alignment_multiple_secondaries(): + """Test alignment with multiple secondary observables.""" + from reactivex import Subject + + primary_subject = Subject() + secondary1_subject = Subject() + secondary2_subject = Subject() + + results = [] + + # Set up alignment with two secondary streams + aligned = align_timestamped( + primary_subject, + secondary1_subject, + secondary2_subject, + buffer_size=1.0, + match_tolerance=0.05, + ) + + # Subscribe to collect results + aligned.subscribe(lambda x: results.append(x)) + + # Send a primary message + primary1 = SimpleTimestamped(1.0, "primary1") + primary_subject.on_next(primary1) + + # No results yet (waiting for both secondaries) + assert len(results) == 0 + + # Send first secondary + sec1_msg1 = SimpleTimestamped(1.01, "sec1_msg1") + secondary1_subject.on_next(sec1_msg1) + + # Still no results (waiting for secondary2) + assert len(results) == 0 + + # Send second secondary + sec2_msg1 = SimpleTimestamped(1.02, "sec2_msg1") + secondary2_subject.on_next(sec2_msg1) + + # Now we should have a result + assert len(results) == 1 + assert results[0][0].data == "primary1" + assert results[0][1].data == "sec1_msg1" + assert results[0][2].data == "sec2_msg1" + + # Test partial match (one secondary missing) + primary2 = SimpleTimestamped(2.0, "primary2") + primary_subject.on_next(primary2) + + # Send only one secondary + sec1_msg2 = SimpleTimestamped(2.01, "sec1_msg2") + secondary1_subject.on_next(sec1_msg2) + + # No result yet + assert len(results) == 1 + + # Send a secondary2 that's too far + sec2_far = SimpleTimestamped(2.1, "sec2_far") # Outside tolerance + secondary2_subject.on_next(sec2_far) + + # Still no result (secondary2 is outside tolerance) + assert len(results) == 1 + + # Complete the streams + primary_subject.on_completed() + secondary1_subject.on_completed() + secondary2_subject.on_completed() + + +def test_timestamp_alignment_delayed_secondary(): + """Test alignment when secondary messages arrive late but still within tolerance.""" + from reactivex import Subject + + primary_subject = Subject() + secondary_subject = Subject() + + results = [] + + # Set up alignment with a 2-second buffer + aligned = align_timestamped( + primary_subject, secondary_subject, buffer_size=2.0, match_tolerance=0.1 + ) + + # Subscribe to collect results + aligned.subscribe(lambda x: results.append(x)) + + # Send primary messages + primary1 = SimpleTimestamped(1.0, "primary1") + primary2 = SimpleTimestamped(2.0, "primary2") + primary3 = SimpleTimestamped(3.0, "primary3") + + primary_subject.on_next(primary1) + primary_subject.on_next(primary2) + primary_subject.on_next(primary3) + + # No results yet + assert len(results) == 0 + + # Send delayed secondaries (in timestamp order) + secondary1 = SimpleTimestamped(1.05, "secondary1") # Matches primary1 + secondary_subject.on_next(secondary1) + assert len(results) == 1 # primary1 matched + assert results[0][0].data == "primary1" + assert results[0][1].data == "secondary1" + + secondary2 = SimpleTimestamped(2.02, "secondary2") # Matches primary2 + secondary_subject.on_next(secondary2) + assert len(results) == 2 # primary2 matched + assert results[1][0].data == "primary2" + assert results[1][1].data == "secondary2" + + # Now send a secondary that's past primary3's match window + secondary_future = SimpleTimestamped(3.2, "secondary_future") # Too far from primary3 + secondary_subject.on_next(secondary_future) + # At this point, primary3 should be removed as unmatchable + assert len(results) == 2 # No new matches + + # Send a new primary that can match with secondary_future + primary4 = SimpleTimestamped(3.15, "primary4") + primary_subject.on_next(primary4) + assert len(results) == 3 # Should match immediately + assert results[2][0].data == "primary4" + assert results[2][1].data == "secondary_future" + + # Complete the streams + primary_subject.on_completed() + secondary_subject.on_completed() + + +def test_timestamp_alignment_buffer_cleanup(): + """Test that old buffered primaries are cleaned up.""" + from reactivex import Subject + import time as time_module + + primary_subject = Subject() + secondary_subject = Subject() + + results = [] + + # Set up alignment with a 0.5-second buffer + aligned = align_timestamped( + primary_subject, secondary_subject, buffer_size=0.5, match_tolerance=0.05 + ) + + # Subscribe to collect results + aligned.subscribe(lambda x: results.append(x)) + + # Use real timestamps for this test + now = time_module.time() + + # Send an old primary + old_primary = Timestamped(now - 1.0) # 1 second ago + old_primary.data = "old" + primary_subject.on_next(old_primary) + + # Send a recent secondary to trigger cleanup + recent_secondary = Timestamped(now) + recent_secondary.data = "recent" + secondary_subject.on_next(recent_secondary) + + # Old primary should not match (outside buffer window) + assert len(results) == 0 + + # Send a matching pair within buffer + new_primary = Timestamped(now + 0.1) + new_primary.data = "new_primary" + new_secondary = Timestamped(now + 0.11) + new_secondary.data = "new_secondary" + + primary_subject.on_next(new_primary) + secondary_subject.on_next(new_secondary) + + # Should have one match + assert len(results) == 1 + assert results[0][0].data == "new_primary" + assert results[0][1].data == "new_secondary" + + # Complete the streams + primary_subject.on_completed() + secondary_subject.on_completed() diff --git a/dimos/types/test_weaklist.py b/dimos/types/test_weaklist.py new file mode 100644 index 0000000000..fd0b05d74f --- /dev/null +++ b/dimos/types/test_weaklist.py @@ -0,0 +1,163 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for WeakList implementation.""" + +import gc +import pytest +from dimos.types.weaklist import WeakList + + +class TestObject: + """Simple test object.""" + + def __init__(self, value): + self.value = value + + def __repr__(self): + return f"TestObject({self.value})" + + +def test_weaklist_basic_operations(): + """Test basic append, iterate, and length operations.""" + wl = WeakList() + + # Add objects + obj1 = TestObject(1) + obj2 = TestObject(2) + obj3 = TestObject(3) + + wl.append(obj1) + wl.append(obj2) + wl.append(obj3) + + # Check length and iteration + assert len(wl) == 3 + assert list(wl) == [obj1, obj2, obj3] + + # Check contains + assert obj1 in wl + assert obj2 in wl + assert TestObject(4) not in wl + + +def test_weaklist_auto_removal(): + """Test that objects are automatically removed when garbage collected.""" + wl = WeakList() + + obj1 = TestObject(1) + obj2 = TestObject(2) + obj3 = TestObject(3) + + wl.append(obj1) + wl.append(obj2) + wl.append(obj3) + + assert len(wl) == 3 + + # Delete one object and force garbage collection + del obj2 + gc.collect() + + # Should only have 2 objects now + assert len(wl) == 2 + assert list(wl) == [obj1, obj3] + + +def test_weaklist_explicit_remove(): + """Test explicit removal of objects.""" + wl = WeakList() + + obj1 = TestObject(1) + obj2 = TestObject(2) + + wl.append(obj1) + wl.append(obj2) + + # Remove obj1 + wl.remove(obj1) + assert len(wl) == 1 + assert obj1 not in wl + assert obj2 in wl + + # Try to remove non-existent object + with pytest.raises(ValueError): + wl.remove(TestObject(3)) + + +def test_weaklist_indexing(): + """Test index access.""" + wl = WeakList() + + obj1 = TestObject(1) + obj2 = TestObject(2) + obj3 = TestObject(3) + + wl.append(obj1) + wl.append(obj2) + wl.append(obj3) + + assert wl[0] is obj1 + assert wl[1] is obj2 + assert wl[2] is obj3 + + # Test index out of range + with pytest.raises(IndexError): + _ = wl[3] + + +def test_weaklist_clear(): + """Test clearing the list.""" + wl = WeakList() + + obj1 = TestObject(1) + obj2 = TestObject(2) + + wl.append(obj1) + wl.append(obj2) + + assert len(wl) == 2 + + wl.clear() + assert len(wl) == 0 + assert obj1 not in wl + + +def test_weaklist_iteration_during_modification(): + """Test that iteration works even if objects are deleted during iteration.""" + wl = WeakList() + + objects = [TestObject(i) for i in range(5)] + for obj in objects: + wl.append(obj) + + # Verify initial state + assert len(wl) == 5 + + # Iterate and check that we can safely delete objects + seen_values = [] + for obj in wl: + seen_values.append(obj.value) + if obj.value == 2: + # Delete another object (not the current one) + del objects[3] # Delete TestObject(3) + gc.collect() + + # The object with value 3 gets garbage collected during iteration + # so we might not see it (depends on timing) + assert len(seen_values) in [4, 5] + assert all(v in [0, 1, 2, 3, 4] for v in seen_values) + + # After iteration, the list should have 4 objects (one was deleted) + assert len(wl) == 4 diff --git a/dimos/types/timestamped.py b/dimos/types/timestamped.py index 36f86b2ebb..c1e3bae4df 100644 --- a/dimos/types/timestamped.py +++ b/dimos/types/timestamped.py @@ -11,16 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from collections import defaultdict from datetime import datetime, timezone -from typing import Generic, Iterable, Optional, Tuple, TypedDict, TypeVar, Union +from typing import Generic, Iterable, List, Optional, Tuple, TypeVar, Union from dimos_lcm.builtin_interfaces import Time as ROSTime +from reactivex import create # from dimos_lcm.std_msgs import Time as ROSTime from reactivex.observable import Observable from sortedcontainers import SortedKeyList +from dimos.types.weaklist import WeakList +from dimos.utils.logging_config import setup_logger + +logger = setup_logger("dimos.timestampAlignment") + # any class that carries a timestamp should inherit from this # this allows us to work with timeseries in consistent way, allign messages, replay etc # aditional functionality will come to this class soon @@ -243,90 +249,157 @@ def _prune_old_messages(self, current_ts: float) -> None: if keep_idx > 0: del self._items[:keep_idx] + def remove_by_timestamp(self, timestamp: float) -> bool: + """Remove an item with the given timestamp. Returns True if item was found and removed.""" + idx = self._items.bisect_key_left(timestamp) -def align_timestamped( - primary_observable: Observable[PRIMARY], - secondary_observable: Observable[SECONDARY], - buffer_size: float = 1.0, # seconds - match_tolerance: float = 0.05, # seconds -) -> Observable[Tuple[PRIMARY, SECONDARY]]: - from reactivex import create + if idx < len(self._items) and self._items[idx].ts == timestamp: + del self._items[idx] + return True + return False - def subscribe(observer, scheduler=None): - secondary_collection: TimestampedBufferCollection[SECONDARY] = TimestampedBufferCollection( - buffer_size - ) - # Subscribe to secondary to populate the buffer - secondary_sub = secondary_observable.subscribe(secondary_collection.add) + def remove(self, item: T) -> bool: + """Remove a timestamped item from the collection. Returns True if item was found and removed.""" + return self.remove_by_timestamp(item.ts) - def on_primary(primary_item: PRIMARY): - secondary_item = secondary_collection.find_closest( - primary_item.ts, tolerance=match_tolerance - ) - if secondary_item is not None: - observer.on_next((primary_item, secondary_item)) - # Subscribe to primary and emit aligned pairs - primary_sub = primary_observable.subscribe( - on_next=on_primary, on_error=observer.on_error, on_completed=observer.on_completed - ) +class MatchContainer(Timestamped, Generic[PRIMARY, SECONDARY]): + """ + This class stores a primary item along with its partial matches to secondary items, + tracking which secondaries are still missing to avoid redundant searches. + """ - # Return cleanup function - def dispose(): - secondary_sub.dispose() - primary_sub.dispose() + def __init__(self, primary: PRIMARY, matches: List[Optional[SECONDARY]]): + super().__init__(primary.ts) + self.primary = primary + self.matches = matches # Direct list with None for missing matches - return dispose + def message_received(self, secondary_idx: int, secondary_item: SECONDARY): + """Process a secondary message and check if it matches this primary.""" + if self.matches[secondary_idx] is None: + self.matches[secondary_idx] = secondary_item - return create(subscribe) + def is_complete(self) -> bool: + """Check if all secondary matches have been found.""" + return all(match is not None for match in self.matches) + + def get_tuple(self) -> Tuple[PRIMARY, ...]: + """Get the result tuple for emission.""" + return (self.primary, *self.matches) -def align_timestamped_multiple( +def align_timestamped( primary_observable: Observable[PRIMARY], *secondary_observables: Observable[SECONDARY], buffer_size: float = 1.0, # seconds - match_tolerance: float = 0.05, # seconds + match_tolerance: float = 0.1, # seconds ) -> Observable[Tuple[PRIMARY, ...]]: - """Align a primary observable with multiple secondary observables. + """Align a primary observable with one or more secondary observables. Args: primary_observable: The primary stream to align against - *secondary_observables: Secondary streams to align - buffer_size: Time window to keep secondary messages in seconds + *secondary_observables: One or more secondary streams to align + buffer_size: Time window to keep messages in seconds match_tolerance: Maximum time difference for matching in seconds Returns: - Observable that emits tuples of (primary_item, secondary1, secondary2, ...) - where each secondary item is the closest match from the corresponding + If single secondary observable: Observable that emits tuples of (primary_item, secondary_item) + If multiple secondary observables: Observable that emits tuples of (primary_item, secondary1, secondary2, ...) + Each secondary item is the closest match from the corresponding secondary observable, or None if no match within tolerance. """ - from reactivex import create def subscribe(observer, scheduler=None): - # Create a buffer collection for each secondary observable - secondary_collections: list[TimestampedBufferCollection[SECONDARY]] = [ + # Create a timed buffer collection for each secondary observable + secondary_collections: List[TimestampedBufferCollection[SECONDARY]] = [ TimestampedBufferCollection(buffer_size) for _ in secondary_observables ] + # WeakLists to track subscribers to each secondary observable + secondary_stakeholders = defaultdict(WeakList) + + # Buffer for unmatched MatchContainers - automatically expires old items + primary_buffer: TimestampedBufferCollection[MatchContainer[PRIMARY, SECONDARY]] = ( + TimestampedBufferCollection(buffer_size) + ) + # Subscribe to all secondary observables secondary_subs = [] - for i, secondary_obs in enumerate(secondary_observables): - sub = secondary_obs.subscribe(secondary_collections[i].add) - secondary_subs.append(sub) - def on_primary(primary_item: PRIMARY): - # Find closest match from each secondary collection - secondary_items = [] - for collection in secondary_collections: - secondary_item = collection.find_closest(primary_item.ts, tolerance=match_tolerance) - secondary_items.append(secondary_item) + def has_secondary_progressed_past(secondary_ts: float, primary_ts: float) -> bool: + """Check if secondary stream has progressed past the primary + tolerance.""" + return secondary_ts > primary_ts + match_tolerance - # Emit the aligned tuple (flatten into single tuple) - observer.on_next((primary_item, *secondary_items)) + def remove_stakeholder(stakeholder: MatchContainer): + """Remove a stakeholder from all tracking structures.""" + primary_buffer.remove(stakeholder) + for weak_list in secondary_stakeholders.values(): + weak_list.discard(stakeholder) - # Subscribe to primary and emit aligned tuples + def on_secondary(i: int, secondary_item: SECONDARY): + # Add the secondary item to its collection + secondary_collections[i].add(secondary_item) + + # Check all stakeholders for this secondary stream + for stakeholder in secondary_stakeholders[i]: + # If the secondary stream has progressed past this primary, + # we won't be able to match it anymore + if has_secondary_progressed_past(secondary_item.ts, stakeholder.ts): + logger.debug(f"secondary progressed, giving up {stakeholder.ts}") + + remove_stakeholder(stakeholder) + continue + + # Check if this secondary is within tolerance of the primary + if abs(stakeholder.ts - secondary_item.ts) <= match_tolerance: + stakeholder.message_received(i, secondary_item) + + # If all secondaries matched, emit result + if stakeholder.is_complete(): + logger.debug(f"Emitting deferred match {stakeholder.ts}") + observer.on_next(stakeholder.get_tuple()) + remove_stakeholder(stakeholder) + + for i, secondary_obs in enumerate(secondary_observables): + secondary_subs.append( + secondary_obs.subscribe( + lambda x, idx=i: on_secondary(idx, x), on_error=observer.on_error + ) + ) + + def on_primary(primary_item: PRIMARY): + # Try to find matches in existing secondary collections + matches = [None] * len(secondary_observables) + + for i, collection in enumerate(secondary_collections): + closest = collection.find_closest(primary_item.ts, tolerance=match_tolerance) + if closest is not None: + matches[i] = closest + else: + # Check if this secondary stream has already progressed past this primary + if collection.end_ts is not None and has_secondary_progressed_past( + collection.end_ts, primary_item.ts + ): + # This secondary won't match, so don't buffer this primary + return + + # If all matched, emit immediately without creating MatchContainer + if all(match is not None for match in matches): + logger.debug(f"Immadiate match {primary_item.ts}") + result = (primary_item, *matches) + observer.on_next(result) + else: + logger.debug(f"Deferred match attempt {primary_item.ts}") + match_container = MatchContainer(primary_item, matches) + primary_buffer.add(match_container) + + for i, match in enumerate(matches): + if match is None: + secondary_stakeholders[i].append(match_container) + + # Subscribe to primary observable primary_sub = primary_observable.subscribe( - on_next=on_primary, on_error=observer.on_error, on_completed=observer.on_completed + on_primary, on_error=observer.on_error, on_completed=observer.on_completed ) # Return cleanup function diff --git a/dimos/types/weaklist.py b/dimos/types/weaklist.py new file mode 100644 index 0000000000..8722455c66 --- /dev/null +++ b/dimos/types/weaklist.py @@ -0,0 +1,85 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Weak reference list implementation that automatically removes dead references.""" + +import weakref +from typing import Any, Iterator, Optional + + +class WeakList: + """A list that holds weak references to objects. + + Objects are automatically removed when garbage collected. + Supports iteration, append, remove, and length operations. + """ + + def __init__(self): + self._refs = [] + + def append(self, obj: Any) -> None: + """Add an object to the list (stored as weak reference).""" + + def _cleanup(ref): + try: + self._refs.remove(ref) + except ValueError: + pass + + self._refs.append(weakref.ref(obj, _cleanup)) + + def remove(self, obj: Any) -> None: + """Remove an object from the list.""" + for i, ref in enumerate(self._refs): + if ref() is obj: + del self._refs[i] + return + raise ValueError(f"{obj} not in WeakList") + + def discard(self, obj: Any) -> None: + """Remove an object from the list if present, otherwise do nothing.""" + try: + self.remove(obj) + except ValueError: + pass + + def __iter__(self) -> Iterator[Any]: + """Iterate over live objects, skipping dead references.""" + # Create a copy to avoid modification during iteration + for ref in self._refs[:]: + obj = ref() + if obj is not None: + yield obj + + def __len__(self) -> int: + """Return count of live objects.""" + return sum(1 for _ in self) + + def __contains__(self, obj: Any) -> bool: + """Check if object is in the list.""" + return any(ref() is obj for ref in self._refs) + + def clear(self) -> None: + """Remove all references.""" + self._refs.clear() + + def __getitem__(self, index: int) -> Any: + """Get object at index (only counting live objects).""" + for i, obj in enumerate(self): + if i == index: + return obj + raise IndexError("WeakList index out of range") + + def __repr__(self) -> str: + return f"WeakList({list(self)})" diff --git a/dimos/utils/cli/recorder/__init__.py b/dimos/utils/cli/recorder/__init__.py new file mode 100644 index 0000000000..f3944cec83 --- /dev/null +++ b/dimos/utils/cli/recorder/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/dimos/utils/cli/recorder/recorder.py b/dimos/utils/cli/recorder/recorder.py new file mode 100644 index 0000000000..641d1a5102 --- /dev/null +++ b/dimos/utils/cli/recorder/recorder.py @@ -0,0 +1,142 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional, Set + +import lcm + +from dimos.protocol.service.lcmservice import LCMConfig, LCMService +from dimos.utils.cli.lcmspy.lcmspy import Topic +from dimos.utils.data import _get_data_dir +from dimos.utils.testing import TimedSensorStorage + + +class TopicInfo(Topic): + """Extended topic info with selection state.""" + + def __init__(self, name: str, history_window: float = 60.0): + super().__init__(name, history_window) + self.selected: bool = False + + +@dataclass +class RecorderConfig(LCMConfig): + """Configuration for the LCM recorder.""" + + topic_history_window: float = 60.0 + recording_base_dir: str = "recordings" + + +class RecorderService(LCMService): + """Service for recording selected LCM topics.""" + + default_config = RecorderConfig + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.topics: Dict[str, TopicInfo] = {} + self.l = lcm.LCM(self.config.url) if self.config.url else lcm.LCM() + self.recording = False + self.recording_start_time: Optional[float] = None + self.recording_dir: Optional[Path] = None + self.storages: Dict[str, TimedSensorStorage] = {} + + def start(self): + """Start the recorder service and topic discovery.""" + super().start() + # Subscribe to all topics for discovery + self.l.subscribe(".*", self._handle_message) + + def stop(self): + """Stop the recorder service.""" + if self.recording: + self.stop_recording() + super().stop() + + def _handle_message(self, topic: str, data: bytes): + """Handle incoming LCM messages.""" + # Track topic if not already known + if topic not in self.topics: + self.topics[topic] = TopicInfo( + name=topic, history_window=self.config.topic_history_window + ) + + # Update topic stats + self.topics[topic].msg(data) + + # If recording and topic is selected, save the message + if self.recording and self.topics[topic].selected and topic in self.storages: + self.storages[topic].save_one(data) + + def get_selected_topics(self) -> List[str]: + """Get list of selected topic names.""" + return [name for name, info in self.topics.items() if info.selected] + + def toggle_topic_selection(self, topic_name: str): + """Toggle selection state of a topic.""" + if topic_name in self.topics: + self.topics[topic_name].selected = not self.topics[topic_name].selected + + def select_all_topics(self, select: bool = True): + """Select or deselect all topics.""" + for topic in self.topics.values(): + topic.selected = select + + def start_recording(self) -> bool: + """Start recording selected topics.""" + selected_topics = self.get_selected_topics() + if not selected_topics: + return False + + # Create recording directory with timestamp + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + self.recording_dir = _get_data_dir( + f"{self.config.recording_base_dir}/recording_{timestamp}" + ) + self.recording_dir.mkdir(parents=True, exist_ok=True) + + # Create storage for each selected topic + self.storages.clear() + for topic_name in selected_topics: + # Clean topic name for filesystem (replace / with _) + safe_name = topic_name.replace("/", "_").strip("_") + storage_path = f"{self.config.recording_base_dir}/recording_{timestamp}/{safe_name}" + self.storages[topic_name] = TimedSensorStorage(storage_path) + + self.recording = True + self.recording_start_time = time.time() + return True + + def stop_recording(self) -> Optional[Path]: + """Stop recording and return the recording directory path.""" + if not self.recording: + return None + + self.recording = False + self.recording_start_time = None + recording_dir = self.recording_dir + self.recording_dir = None + self.storages.clear() + + return recording_dir + + def get_recording_duration(self) -> float: + """Get current recording duration in seconds.""" + if self.recording and self.recording_start_time: + return time.time() - self.recording_start_time + return 0.0 diff --git a/dimos/utils/cli/recorder/run_recorder.py b/dimos/utils/cli/recorder/run_recorder.py new file mode 100644 index 0000000000..3aa9920789 --- /dev/null +++ b/dimos/utils/cli/recorder/run_recorder.py @@ -0,0 +1,275 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import threading +from typing import List + +from rich.text import Text +from textual.app import App, ComposeResult +from textual.binding import Binding +from textual.containers import Container, Horizontal, Vertical +from textual.coordinate import Coordinate +from textual.reactive import reactive +from textual.widgets import DataTable, Footer, Header, Label, Static + +from dimos.utils.cli.recorder.recorder import RecorderService, TopicInfo + + +def gradient(max_value: float, value: float) -> str: + """Get gradient color based on value.""" + from textual.color import Color + + ratio = min(value / max_value, 1.0) + green = Color(0, 255, 0) + red = Color(255, 0, 0) + color = green.blend(red, ratio) + return color.hex + + +def topic_text(topic_name: str) -> Text: + """Format topic name with colors.""" + if "#" in topic_name: + parts = topic_name.split("#", 1) + return Text(parts[0], style="white") + Text("#" + parts[1], style="blue") + + if topic_name[:4] == "/rpc": + return Text(topic_name[:4], style="red") + Text(topic_name[4:], style="white") + + return Text(topic_name, style="white") + + +def format_duration(duration: float) -> str: + """Format duration in human readable format.""" + hours = int(duration // 3600) + minutes = int((duration % 3600) // 60) + seconds = int(duration % 60) + + if hours > 0: + return f"{hours:02d}:{minutes:02d}:{seconds:02d}" + else: + return f"{minutes:02d}:{seconds:02d}" + + +class LCMRecorderApp(App): + """Interactive LCM topic recorder using Textual.""" + + CSS = """ + Screen { + layout: vertical; + } + + #status-bar { + height: 3; + background: $surface; + border: solid $primary; + padding: 0 1; + } + + #status-text { + padding: 0; + margin: 0; + } + + DataTable { + height: 1fr; + width: 1fr; + border: none; + background: black; + } + + Footer { + dock: bottom; + } + """ + + refresh_interval: float = 0.5 + show_command_palette = reactive(True) + + BINDINGS = [ + Binding("q", "quit", "Quit"), + Binding("ctrl+c", "quit", "Quit"), + Binding("r", "toggle_record", "Record", priority=True), + Binding("space", "toggle_selection", "Select", priority=True), + Binding("a", "select_all", "Select All", priority=True), + Binding("n", "select_none", "Select None", priority=True), + Binding("up", "cursor_up", "Up", show=False), + Binding("down", "cursor_down", "Down", show=False), + ] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.recorder = RecorderService(autoconf=True) + self.table: DataTable | None = None + self.status_label: Label | None = None + self._recording_path: str = "" + + def compose(self) -> ComposeResult: + # Status bar + with Container(id="status-bar"): + self.status_label = Label("Status: Idle", id="status-text") + yield self.status_label + + # Topic table + self.table = DataTable(zebra_stripes=False, cursor_type="row") + self.table.add_column("", width=3) # No header for selected column + self.table.add_column("Topic") + self.table.add_column("Freq (Hz)", width=12) + self.table.add_column("Bandwidth", width=15) + self.table.add_column("Total Traffic", width=15) + + yield self.table + yield Footer() + + def on_mount(self): + """Initialize the app when mounted.""" + self.theme = "flexoki" + self.recorder.start() + self.set_interval(self.refresh_interval, self.refresh_display) + + async def on_unmount(self): + """Clean up when unmounting.""" + self.recorder.stop() + + def refresh_display(self): + """Refresh the table and status display.""" + self.refresh_table() + self.refresh_status() + + def refresh_table(self): + """Update the topic table.""" + topics: List[TopicInfo] = list(self.recorder.topics.values()) + topics.sort(key=lambda t: t.total_traffic(), reverse=True) + + # Remember current cursor row index + current_row = None + if self.table.cursor_coordinate: + current_row = self.table.cursor_coordinate.row + + self.table.clear(columns=False) + + for t in topics: + freq = t.freq(5.0) + kbps = t.kbps(5.0) + bw_val, bw_unit = t.kbps_hr(5.0) + total_val, total_unit = t.total_traffic_hr() + + # Selection indicator + selected = Text("✓", style="green bold") if t.selected else Text(" ") + + self.table.add_row( + selected, + topic_text(t.name), + Text(f"{freq:.1f}", style=gradient(10, freq)), + Text(f"{bw_val} {bw_unit.value}/s", style=gradient(1024 * 3, kbps)), + Text(f"{total_val} {total_unit.value}"), + key=t.name, # Use topic name as row key for cursor tracking + ) + + # Restore cursor position if possible + if current_row is not None and current_row < len(topics): + self.table.move_cursor(row=current_row, column=0) + + def refresh_status(self): + """Update the status display.""" + if self.recorder.recording: + duration = self.recorder.get_recording_duration() + selected_count = len(self.recorder.get_selected_topics()) + status = f"Status: RECORDING ({selected_count} topics) - Duration: {format_duration(duration)}" + self.status_label.update(Text(status, style="red bold")) + + # Show recording path notification + if self._recording_path != str(self.recorder.recording_dir): + self._recording_path = str(self.recorder.recording_dir) + self.notify(f"Recording to: {self._recording_path}", severity="information") + else: + selected_count = len(self.recorder.get_selected_topics()) + total_count = len(self.recorder.topics) + status = f"Status: Idle - {selected_count}/{total_count} topics selected" + self.status_label.update(Text(status, style="green")) + + def action_toggle_selection(self): + """Toggle selection of current topic.""" + cursor_coord = self.table.cursor_coordinate + if cursor_coord is not None: + # Get the row key which is the topic name using coordinate_to_cell_key + try: + row_key, _ = self.table.coordinate_to_cell_key(cursor_coord) + if row_key is not None: + # The row_key is a RowKey object, get its value + topic_name = row_key.value if hasattr(row_key, "value") else str(row_key) + self.recorder.toggle_topic_selection(topic_name) + self.refresh_display() + except Exception as e: + self.notify(f"Error: {e}", severity="error") + + def action_select_all(self): + """Select all topics.""" + self.recorder.select_all_topics(True) + self.refresh_display() + + def action_select_none(self): + """Deselect all topics.""" + self.recorder.select_all_topics(False) + self.refresh_display() + + def action_toggle_record(self): + """Toggle recording state.""" + if self.recorder.recording: + # Stop recording + recording_dir = self.recorder.stop_recording() + if recording_dir: + self.notify( + f"Recording saved to: {recording_dir}", severity="information", timeout=5.0 + ) + else: + # Start recording + if self.recorder.start_recording(): + self.refresh_display() + else: + self.notify("No topics selected for recording!", severity="error") + + def action_cursor_up(self): + """Move cursor up.""" + cursor_coord = self.table.cursor_coordinate + if cursor_coord is not None and cursor_coord.row > 0: + self.table.move_cursor(row=cursor_coord.row - 1) + + def action_cursor_down(self): + """Move cursor down.""" + cursor_coord = self.table.cursor_coordinate + if cursor_coord is not None: + max_row = len(self.recorder.topics) - 1 + if cursor_coord.row < max_row: + self.table.move_cursor(row=cursor_coord.row + 1) + + +def main(): + """Entry point for the LCM recorder.""" + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "web": + import os + from textual_serve.server import Server + + server = Server(f"python {os.path.abspath(__file__)}") + server.serve() + else: + app = LCMRecorderApp() + app.run() + + +if __name__ == "__main__": + main() diff --git a/dimos/utils/cli/recorder/test_play.py b/dimos/utils/cli/recorder/test_play.py new file mode 100644 index 0000000000..e84c5e6471 --- /dev/null +++ b/dimos/utils/cli/recorder/test_play.py @@ -0,0 +1,57 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shutil +import time + +from dimos_lcm.sensor_msgs import PointCloud2 + +from dimos import core +from dimos.msgs.sensor_msgs import Image +from dimos.perception.detection2d.testing import get_moment, publish_moment +from dimos.robot.unitree_webrtc.type.lidar import LidarMessage +from dimos.robot.unitree_webrtc.type.odometry import Odometry +from dimos.utils.data import _get_data_dir +from dimos.utils.testing import TimedSensorReplay, TimedSensorStorage + + +@pytest.mark.tool +def test_publish(): + def start_recorder(): + recording_name = "test_play_recording" + record_data_dir = _get_data_dir(recording_name) + + if record_data_dir.exists(): + shutil.rmtree(record_data_dir) + + lidar_store: TimedSensorStorage = TimedSensorStorage(f"{recording_name}/lidar") + lidar_sub = core.LCMTransport("/lidar", PointCloud2) + lidar_store.consume_stream(lidar_sub.observable()) + + start_recorder() + + dir_name = "unitree_go2_lidar_corrected" + lidar_store = TimedSensorReplay(f"{dir_name}/lidar") + odom_store = TimedSensorReplay( + f"{dir_name}/odom", autocast=Odometry.from_msg + ) # don't worry about autocast, this particular recording requires it + video_store = TimedSensorReplay(f"{dir_name}/video") + + lidar_pub = core.LCMTransport("/lidar", PointCloud2) + odom_pub = core.LCMTransport("/odom", Odometry) + image_pub = core.LCMTransport("/image", Image) + + lidar_store.stream(duration=2.0).subscribe(lidar_pub.publish) + odom_store.stream(duration=2.0).subscribe(odom_pub.publish) + video_store.stream(duration=2.0).subscribe(image_pub.publish) diff --git a/dimos/utils/cli/recorder/test_recorder.py b/dimos/utils/cli/recorder/test_recorder.py new file mode 100644 index 0000000000..60bd429372 --- /dev/null +++ b/dimos/utils/cli/recorder/test_recorder.py @@ -0,0 +1,305 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shutil +import time +from contextlib import contextmanager +from pathlib import Path + +import pytest + +from dimos import core +from dimos_lcm.sensor_msgs import Image +from dimos.utils.data import _get_data_dir + +from .recorder import RecorderService + + +@contextmanager +def temp_recording_dir(recording_name: str): + """Context manager for temporary recording directories that auto-delete on exit. + + Args: + recording_name: Name for the recording subdirectory within data/ + + Yields: + Path: Path to the temporary recording directory + + Example: + with temp_recording_dir("my_test_recording") as record_dir: + lidar_store = TimedSensorStorage(f"{recording_name}/lidar") + # ... do recording ... + # Directory is automatically deleted after exiting the context + """ + record_dir = _get_data_dir(recording_name) + + # Clean up any existing directory + if record_dir.exists(): + shutil.rmtree(record_dir) + + try: + yield record_dir + finally: + # Clean up on exit + if record_dir.exists(): + shutil.rmtree(record_dir) + + +@pytest.mark.lcm +def test_recorder_service_basic(): + """Test basic recorder service functionality.""" + # Start recorder service + recorder = RecorderService(autoconf=True) + recorder.start() + + # Let it discover topics for a moment + time.sleep(0.5) + + # Should have discovered some topics (at least from this test) + # Note: might be empty if no other LCM publishers are running + initial_topic_count = len(recorder.topics) + + # Publish a test message + test_topic = "/test/recorder" + pub = core.LCMTransport(test_topic, Image) + # Create a simple test image with minimal data + test_image = Image() + test_image.width = 640 + test_image.height = 480 + test_image.encoding = "rgb8" + test_image.step = 640 * 3 # 3 bytes per pixel for RGB + test_image.data = bytes(10) # Just a small amount of data for testing + pub.publish(test_image) + + # Give it time to receive + time.sleep(0.1) + + # Should have discovered the test topic (with type suffix) + # Find the topic that starts with our test topic name + found_topics = [t for t in recorder.topics if t.startswith(test_topic)] + assert len(found_topics) > 0, f"Topic {test_topic} not found in {list(recorder.topics.keys())}" + + actual_topic_name = found_topics[0] + assert len(recorder.topics) >= initial_topic_count + 1 + + # Check topic stats + topic_info = recorder.topics[actual_topic_name] + assert topic_info.freq(1.0) > 0 + assert topic_info.total_traffic() > 0 + + recorder.stop() + + +@pytest.mark.lcm +def test_topic_selection(): + """Test topic selection functionality.""" + recorder = RecorderService(autoconf=True) + recorder.start() + + # Publish some test messages + topics = ["/test/topic1", "/test/topic2", "/test/topic3"] + publishers = [] + for topic in topics: + pub = core.LCMTransport(topic, Image) + img = Image() + img.width = 100 + img.height = 100 + img.encoding = "rgb8" + img.step = 100 * 3 + img.data = bytes(10) + pub.publish(img) + publishers.append(pub) + + time.sleep(0.1) + + # All topics should be unselected by default + # Find actual topic names (with type suffixes) + actual_topics = {} + for topic in topics: + found = [t for t in recorder.topics if t.startswith(topic)] + assert len(found) > 0, f"Topic {topic} not found" + actual_topics[topic] = found[0] + assert not recorder.topics[actual_topics[topic]].selected + + # Test individual selection + recorder.toggle_topic_selection(actual_topics[topics[0]]) + assert recorder.topics[actual_topics[topics[0]]].selected + assert not recorder.topics[actual_topics[topics[1]]].selected + + # Test select all + recorder.select_all_topics(True) + for topic in actual_topics.values(): + assert recorder.topics[topic].selected + + # Test deselect all + recorder.select_all_topics(False) + for topic in actual_topics.values(): + assert not recorder.topics[topic].selected + + # Test get selected topics + recorder.toggle_topic_selection(actual_topics[topics[1]]) + recorder.toggle_topic_selection(actual_topics[topics[2]]) + selected = recorder.get_selected_topics() + assert len(selected) == 2 + assert actual_topics[topics[1]] in selected + assert actual_topics[topics[2]] in selected + + recorder.stop() + + +@pytest.mark.lcm +def test_recording(): + """Test recording functionality.""" + # Clean up any existing test recordings + test_recording_dir = _get_data_dir("recordings") + if test_recording_dir.exists(): + shutil.rmtree(test_recording_dir) + + recorder = RecorderService(autoconf=True) + recorder.start() + + # Set up test topics + topic1 = "/test/record1" + topic2 = "/test/record2" + + pub1 = core.LCMTransport(topic1, Image) + pub2 = core.LCMTransport(topic2, Image) + + # Publish initial messages to discover topics + img1 = Image() + img1.width = 200 + img1.height = 200 + img1.encoding = "rgb8" + img1.step = 200 * 3 + img1.data = bytes(10) + pub1.publish(img1) + + img2 = Image() + img2.width = 300 + img2.height = 300 + img2.encoding = "rgb8" + img2.step = 300 * 3 + img2.data = bytes(10) + pub2.publish(img2) + time.sleep(0.1) + + # Find actual topic names and select topics for recording + actual_topic1 = [t for t in recorder.topics if t.startswith(topic1)][0] + actual_topic2 = [t for t in recorder.topics if t.startswith(topic2)][0] + recorder.toggle_topic_selection(actual_topic1) + recorder.toggle_topic_selection(actual_topic2) + + # Start recording + assert recorder.start_recording() + assert recorder.recording + assert recorder.recording_dir is not None + assert recorder.recording_dir.exists() + + # Publish more messages while recording + for i in range(5): + img1 = Image() + img1.width = 200 + img1.height = 200 + img1.encoding = "rgb8" + img1.step = 200 * 3 + img1.data = bytes([i] * 10) + pub1.publish(img1) + + img2 = Image() + img2.width = 300 + img2.height = 300 + img2.encoding = "rgb8" + img2.step = 300 * 3 + img2.data = bytes([i] * 10) + pub2.publish(img2) + time.sleep(0.1) + + # Check recording duration + duration = recorder.get_recording_duration() + assert duration > 0.5 # Should be at least 0.5 seconds + + # Stop recording + recording_dir = recorder.stop_recording() + assert recording_dir is not None + assert recording_dir.exists() + assert not recorder.recording + + # Check that files were created + # Topics should be saved with / replaced by _ and type suffix removed + # The actual directory names will include the type suffix + topic1_dir = recording_dir / actual_topic1.replace("/", "_").strip("_") + topic2_dir = recording_dir / actual_topic2.replace("/", "_").strip("_") + + assert topic1_dir.exists() + assert topic2_dir.exists() + + # Check that pickle files were created + topic1_files = list(topic1_dir.glob("*.pickle")) + topic2_files = list(topic2_dir.glob("*.pickle")) + + assert len(topic1_files) >= 5 # At least 5 messages recorded + assert len(topic2_files) >= 5 + + # Clean up + recorder.stop() + if test_recording_dir.exists(): + shutil.rmtree(test_recording_dir) + + +@pytest.mark.lcm +def test_recording_with_temp_dir(): + """Test recording using temp_recording_dir context manager.""" + with temp_recording_dir("test_temp_recording") as record_dir: + recorder = RecorderService(autoconf=True, recording_base_dir="test_temp_recording") + recorder.start() + + # Publish test message + test_topic = "/test/temp" + pub = core.LCMTransport(test_topic, Image) + img = Image() + img.width = 100 + img.height = 100 + img.encoding = "rgb8" + img.step = 100 * 3 + img.data = bytes(10) + pub.publish(img) + time.sleep(0.1) + + # Find actual topic name and select for recording + actual_topic = [t for t in recorder.topics if t.startswith(test_topic)][0] + recorder.toggle_topic_selection(actual_topic) + assert recorder.start_recording() + + # Record some messages + for i in range(3): + img = Image() + img.width = 100 + img.height = 100 + img.encoding = "rgb8" + img.step = 100 * 3 + img.data = bytes([i] * 10) + pub.publish(img) + time.sleep(0.1) + + recording_path = recorder.stop_recording() + assert recording_path is not None + assert recording_path.exists() + + # Directory should still exist inside context + assert record_dir.exists() + + recorder.stop() + + # Directory should be cleaned up after context + assert not record_dir.exists() diff --git a/dimos/utils/data.py b/dimos/utils/data.py index 62ef6da851..0a2656ca82 100644 --- a/dimos/utils/data.py +++ b/dimos/utils/data.py @@ -12,18 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import glob -import os -import pickle import subprocess import tarfile from functools import cache from pathlib import Path -from typing import Any, Callable, Generic, Iterator, Optional, Type, TypeVar, Union - -from reactivex import from_iterable, interval -from reactivex import operators as ops -from reactivex.observable import Observable +from typing import Optional, Union @cache @@ -38,7 +31,9 @@ def _get_repo_root() -> Path: @cache -def _get_data_dir() -> Path: +def _get_data_dir(extra_path: Optional[str] = None) -> Path: + if extra_path: + return _get_repo_root() / "data" / extra_path return _get_repo_root() / "data" diff --git a/dimos/utils/reactive.py b/dimos/utils/reactive.py index 3318eef2ec..8fa556e0d8 100644 --- a/dimos/utils/reactive.py +++ b/dimos/utils/reactive.py @@ -13,13 +13,13 @@ # limitations under the License. import threading -from typing import Optional, TypeVar, Generic, Any, Callable +from typing import Any, Callable, Generic, Optional, TypeVar import reactivex as rx from reactivex import operators as ops -from reactivex.scheduler import ThreadPoolScheduler from reactivex.disposable import Disposable from reactivex.observable import Observable +from reactivex.scheduler import ThreadPoolScheduler from rxpy_backpressure import BackPressure from dimos.utils.threadpool import get_scheduler @@ -169,3 +169,33 @@ def spyfun(x): return x return ops.map(spyfun) + + +def quality_barrier(quality_func: Callable[[T], float], target_frequency: float): + """ + RxPY pipe operator that selects the highest quality item within each time window. + + Args: + quality_func: Function to compute quality score for each item + target_frequency: Output frequency in Hz (e.g., 1.0 for 1 item per second) + + Returns: + A pipe operator that can be used with .pipe() + """ + window_duration = 1.0 / target_frequency # Duration of each window in seconds + + def _quality_barrier(source: Observable[T]) -> Observable[T]: + return source.pipe( + # Create time-based windows + ops.window_with_time(window_duration), + # For each window, find the highest quality item + ops.flat_map( + lambda window: window.pipe( + ops.to_list(), + ops.map(lambda items: max(items, key=quality_func) if items else None), + ops.filter(lambda x: x is not None), + ) + ), + ) + + return _quality_barrier diff --git a/dimos/utils/test_reactive.py b/dimos/utils/test_reactive.py index 2823850e03..a6cecbd3c6 100644 --- a/dimos/utils/test_reactive.py +++ b/dimos/utils/test_reactive.py @@ -12,18 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import time +from typing import Any, Callable, TypeVar + import numpy as np +import pytest import reactivex as rx from reactivex import operators as ops -from typing import Callable, TypeVar, Any from reactivex.disposable import Disposable + from dimos.utils.reactive import ( backpressure, - getter_streaming, - getter_ondemand, callback_to_observable, + getter_ondemand, + getter_streaming, ) @@ -164,7 +166,7 @@ def test_getter_streaming_nonblocking(): 0.1, "nonblocking getter init shouldn't block", ) - min_time(getter, 0.2, "Expected for first value call to block if cache is empty") + min_time(getter, 0.1, "Expected for first value call to block if cache is empty") assert getter() == 0 time.sleep(0.5) diff --git a/dimos/utils/testing.py b/dimos/utils/testing.py index 8930b2f0e9..8b94578751 100644 --- a/dimos/utils/testing.py +++ b/dimos/utils/testing.py @@ -17,6 +17,7 @@ import os import pickle import re +import shutil import time from pathlib import Path from typing import Any, Callable, Generic, Iterator, Optional, Tuple, TypeVar, Union @@ -107,13 +108,13 @@ def stream( class SensorStorage(Generic[T]): - """Generic sensor data storage utility. + """Generic sensor data storage utility + . + Creates a directory in the test data directory and stores pickled sensor data. - Creates a directory in the test data directory and stores pickled sensor data. - - Args: - name: The name of the storage directory - autocast: Optional function that takes data and returns a processed result before storage. + Args: + name: The name of the storage directory + autocast: Optional function that takes data and returns a processed result before storage. """ def __init__(self, name: str, autocast: Optional[Callable[[T], Any]] = None): @@ -136,6 +137,10 @@ def __init__(self, name: str, autocast: Optional[Callable[[T], Any]] = None): # Create the directory self.root_dir.mkdir(parents=True, exist_ok=True) + def consume_stream(self, observable: Observable[Union[T, Any]]) -> None: + """Consume an observable stream of sensor data without saving.""" + return observable.subscribe(self.save_one) + def save_stream(self, observable: Observable[Union[T, Any]]) -> Observable[int]: """Save an observable stream of sensor data to pickle files.""" return observable.pipe(ops.map(lambda frame: self.save_one(frame))) @@ -296,40 +301,58 @@ def stream( def _subscribe(observer, scheduler=None): from reactivex.disposable import CompositeDisposable, Disposable - scheduler = scheduler or TimeoutScheduler() # default thread-based + scheduler = scheduler or TimeoutScheduler() + disp = CompositeDisposable() iterator = self.iterate_ts( seek=seek, duration=duration, from_timestamp=from_timestamp, loop=loop ) + # Get first message try: - prev_ts, first_data = next(iterator) + first_ts, first_data = next(iterator) except StopIteration: observer.on_completed() return Disposable() - # Emit the first sample immediately + # Establish timing reference + start_local_time = time.time() + start_replay_time = first_ts + + # Emit first sample immediately observer.on_next(first_data) - disp = CompositeDisposable() + # Pre-load next message + try: + next_message = next(iterator) + except StopIteration: + observer.on_completed() + return disp + + def schedule_emission(message): + nonlocal next_message + ts, data = message - def emit_next(prev_timestamp): + # Pre-load the following message while we have time try: - ts, data = next(iterator) + next_message = next(iterator) except StopIteration: - observer.on_completed() - return + next_message = None - delay = max(0.0, ts - prev_timestamp) / speed + # Calculate absolute emission time + target_time = start_local_time + (ts - start_replay_time) / speed + delay = max(0.0, target_time - time.time()) - def _action(sc, _state=None): + def emit(): observer.on_next(data) - emit_next(ts) # schedule the following sample + if next_message is not None: + schedule_emission(next_message) + else: + observer.on_completed() - # Schedule the next emission relative to previous timestamp - disp.add(scheduler.schedule_relative(delay, _action)) + disp.add(scheduler.schedule_relative(delay, lambda sc, _: emit())) - emit_next(prev_ts) + schedule_emission(next_message) return disp diff --git a/pyproject.toml b/pyproject.toml index 7979293411..670f98a0b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,6 +111,7 @@ dependencies = [ [project.scripts] lcmspy = "dimos.utils.cli.lcmspy.run_lcmspy:main" +lcm-recorder = "dimos.utils.cli.recorder.run_recorder:main" foxglove-bridge = "dimos.utils.cli.foxglove_bridge.run_foxglove_bridge:main" skillspy = "dimos.utils.cli.skillspy.skillspy:main" agentspy = "dimos.utils.cli.agentspy.agentspy:main"