diff --git a/.gitignore b/.gitignore
index c8e4eb199d..adc50a7ef6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,3 +43,5 @@ data/*
 !data/.lfs/
 FastSAM-x.pt
 yolo11n.pt
+
+/thread_monitor_report.csv
diff --git a/assets/foxglove_unitree_yolo.json b/assets/foxglove_unitree_yolo.json
index e8156a2a05..ab53e4a71e 100644
--- a/assets/foxglove_unitree_yolo.json
+++ b/assets/foxglove_unitree_yolo.json
@@ -25,14 +25,13 @@
           "color": "#248eff57"
         },
         "ff758451-8c06-4419-a995-e93c825eb8be": {
-          "visible": true,
+          "visible": false,
           "frameLocked": true,
           "label": "Grid",
           "instanceId": "ff758451-8c06-4419-a995-e93c825eb8be",
           "layerId": "foxglove.Grid",
           "frameId": "base_link",
-          "size": 3,
-          "divisions": 3,
+          "divisions": 6,
           "lineWidth": 1.5,
           "color": "#24fff4ff",
           "position": [
@@ -45,18 +44,19 @@
             0,
             0
           ],
-          "order": 2
+          "order": 2,
+          "size": 6
         }
       },
       "cameraState": {
         "perspective": true,
-        "distance": 72.10076346479195,
-        "phi": 58.649061347608615,
-        "thetaOffset": 141.2348270943936,
+        "distance": 13.268408624096915,
+        "phi": 26.658696672199024,
+        "thetaOffset": 99.69918626426482,
         "targetOffset": [
-          1.4329507220695694,
-          -1.682769241958899,
-          0.3118341583211758
+          1.740213570345715,
+          0.7318803628974015,
+          -1.5060700211358968
         ],
         "target": [
           0,
@@ -89,6 +89,15 @@
       "transforms": {
         "frame:camera_link": {
           "visible": false
+        },
+        "frame:sensor": {
+          "visible": false
+        },
+        "frame:sensor_at_scan": {
+          "visible": false
+        },
+        "frame:map": {
+          "visible": true
         }
       },
       "topics": {
@@ -99,28 +108,27 @@
           "colorMode": "colormap",
           "colorMap": "turbo",
           "pointShape": "circle",
-          "pointSize": 10,
-          "explicitAlpha": 1,
+          "pointSize": 2,
+          "explicitAlpha": 0.8,
           "decayTime": 0,
-          "cubeSize": 0.1,
-          "minValue": -0.3,
-          "cubeOutline": false
+          "cubeSize": 0.05,
+          "cubeOutline": false,
+          "minValue": -2
         },
         "/odom": {
-          "visible": false,
+          "visible": true,
           "axisScale": 1
         },
         "/video": {
           "visible": false
         },
         "/global_map": {
-          "visible": false,
+          "visible": true,
           "colorField": "z",
           "colorMode": "colormap",
           "colorMap": "turbo",
-          "pointSize": 10,
           "decayTime": 0,
-          "pointShape": "cube",
+          "pointShape": "square",
           "cubeOutline": false,
           "cubeSize": 0.08,
           "gradient": [
@@ -128,8 +136,9 @@
             "#d1e2e2ff"
           ],
           "stixelsEnabled": false,
-          "explicitAlpha": 1,
-          "minValue": -0.2
+          "explicitAlpha": 0.339,
+          "minValue": -0.2,
+          "pointSize": 5
         },
         "/global_path": {
           "visible": true,
@@ -152,7 +161,7 @@
           "visible": false
         },
         "/global_costmap": {
-          "visible": true,
+          "visible": false,
           "maxColor": "#6b2b2bff",
           "frameLocked": false,
           "unknownColor": "#80808000",
@@ -199,6 +208,197 @@
         },
         "/navigation_goal": {
           "visible": true
+        },
+        "/debug_camera_optical_points": {
+          "stixelsEnabled": false,
+          "visible": false,
+          "pointSize": 0.07,
+          "pointShape": "cube",
+          "colorField": "z",
+          "colorMode": "colormap",
+          "colorMap": "turbo"
+        },
+        "/debug_world_points": {
+          "visible": false,
+          "colorField": "z",
+          "colorMode": "colormap",
+          "colorMap": "rainbow",
+          "pointShape": "cube"
+        },
+        "/filtered_points_suitcase_0": {
+          "visible": false,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointShape": "cube",
+          "flatColor": "#ff0808ff",
+          "cubeSize": 0.149,
+          "pointSize": 28.57
+        },
+        "/filtered_points_combined": {
+          "visible": true,
+          "flatColor": "#ff0000ff",
+          "pointShape": "cube",
+          "pointSize": 6.63,
+          "colorField": "z",
+          "colorMode": "gradient",
+          "colorMap": "rainbow",
+          "cubeSize": 0.35,
+          "gradient": [
+            "#d100caff",
+            "#ff0000ff"
+          ]
+        },
+        "/filtered_points_box_7": {
+          "visible": true,
+          "flatColor": "#fbfaffff",
+          "colorField": "intensity",
+          "colorMode": "colormap",
+          "colorMap": "turbo"
+        },
+        "/filtered_pointcloud": {
+          "visible": true,
+          "colorField": "z",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "flatColor": "#ff0000ff",
+          "pointSize": 40.21,
+          "pointShape": "cube",
+          "cubeSize": 0.1,
+          "cubeOutline": true
+        },
+        "/detected": {
+          "visible": false,
+          "pointSize": 1.5,
+          "pointShape": "cube",
+          "cubeSize": 0.118,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "flatColor": "#d70000ff",
+          "cubeOutline": true
+        },
+        "/detected_0": {
+          "visible": true,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointSize": 1.6,
+          "pointShape": "cube",
+          "cubeSize": 0.1,
+          "flatColor": "#e00000ff",
+          "stixelsEnabled": false,
+          "decayTime": 0,
+          "cubeOutline": true
+        },
+        "/detected_1": {
+          "visible": true,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointShape": "cube",
+          "cubeSize": 0.1,
+          "flatColor": "#00ff15ff",
+          "cubeOutline": true
+        },
+        "/image_detected_0": {
+          "visible": false
+        },
+        "/detected/pointcloud/1": {
+          "visible": true,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointShape": "cube",
+          "flatColor": "#15ff00ff",
+          "pointSize": 0.1,
+          "cubeSize": 0.05,
+          "cubeOutline": true
+        },
+        "/detected/pointcloud/2": {
+          "visible": true,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointShape": "cube",
+          "flatColor": "#00ffe1ff",
+          "pointSize": 10,
+          "cubeOutline": true,
+          "cubeSize": 0.05
+        },
+        "/detected/pointcloud/0": {
+          "visible": true,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointShape": "cube",
+          "flatColor": "#ff0000ff",
+          "cubeOutline": true,
+          "cubeSize": 0.04
+        },
+        "/detected/image/0": {
+          "visible": false
+        },
+        "/detected/image/3": {
+          "visible": false
+        },
+        "/detected/pointcloud/3": {
+          "visible": true,
+          "pointSize": 1.5,
+          "pointShape": "cube",
+          "cubeSize": 0.1,
+          "flatColor": "#00fffaff",
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo"
+        },
+        "/detected/image/1": {
+          "visible": false
+        },
+        "/registered_scan": {
+          "visible": true,
+          "colorField": "z",
+          "colorMode": "colormap",
+          "colorMap": "turbo",
+          "pointShape": "circle",
+          "pointSize": 2
+        },
+        "/image/camera_info": {
+          "visible": true,
+          "distance": 2
+        },
+        "/map": {
+          "visible": true,
+          "colorField": "z",
+          "colorMode": "colormap",
+          "colorMap": "turbo",
+          "pointShape": "square",
+          "cubeSize": 0.13,
+          "explicitAlpha": 1,
+          "pointSize": 1,
+          "decayTime": 2
+        },
+        "/detection3d/markers": {
+          "visible": true,
+          "color": "#88ff00ff",
+          "showOutlines": true,
+          "selectedIdVariable": ""
+        },
+        "/foxglove/scene_update": {
+          "visible": true
+        },
+        "/scene_update": {
+          "visible": true,
+          "showOutlines": true,
+          "computeVertexNormals": true
+        },
+        "/target": {
+          "visible": true,
+          "axisScale": 1
+        },
+        "/goal_pose": {
+          "visible": true,
+          "axisScale": 0.5
         }
       },
       "publish": {
@@ -211,8 +411,8 @@
         "poseEstimateThetaDeviation": 0.26179939
       },
       "imageMode": {},
-      "foxglovePanelTitle": "test",
-      "followTf": "world"
+      "foxglovePanelTitle": "",
+      "followTf": "map"
     },
     "Image!3mnp456": {
       "cameraState": {
@@ -245,7 +445,7 @@
         "enableStats": false,
         "transforms": {
           "showLabel": false,
-          "visible": false
+          "visible": true
         }
       },
       "transforms": {
@@ -264,13 +464,14 @@
       },
       "topics": {
         "/lidar": {
-          "visible": true,
-          "colorField": "_auto_distance",
+          "visible": false,
+          "colorField": "z",
           "colorMode": "colormap",
           "colorMap": "turbo",
-          "pointSize": 4,
+          "pointSize": 6,
           "explicitAlpha": 0.6,
-          "pointShape": "square"
+          "pointShape": "circle",
+          "cubeSize": 0.016
         },
         "/odom": {
           "visible": false
@@ -281,6 +482,103 @@
         "/global_costmap": {
           "visible": false,
           "minColor": "#ffffff00"
+        },
+        "/detected_0": {
+          "visible": true,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointSize": 23,
+          "pointShape": "cube",
+          "cubeSize": 0.04,
+          "flatColor": "#ff0000ff",
+          "stixelsEnabled": false
+        },
+        "/detected_1": {
+          "visible": true,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointSize": 20.51,
+          "flatColor": "#34ff00ff",
+          "pointShape": "cube",
+          "cubeSize": 0.04,
+          "cubeOutline": false
+        },
+        "/filtered_pointcloud": {
+          "visible": true,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "rainbow",
+          "pointSize": 1.5,
+          "pointShape": "cube",
+          "flatColor": "#ff0000ff",
+          "cubeSize": 0.1
+        },
+        "/global_map": {
+          "visible": false,
+          "colorField": "z",
+          "colorMode": "colormap",
+          "colorMap": "turbo",
+          "pointShape": "cube",
+          "pointSize": 5
+        },
+        "/detected/pointcloud/1": {
+          "visible": false,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointShape": "cube",
+          "cubeSize": 0.01,
+          "flatColor": "#00ff1eff",
+          "pointSize": 15,
+          "decayTime": 0,
+          "cubeOutline": true
+        },
+        "/detected/pointcloud/2": {
+          "visible": false,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointShape": "circle",
+          "cubeSize": 0.1,
+          "flatColor": "#00fbffff",
+          "pointSize": 0.01
+        },
+        "/detected/pointcloud/0": {
+          "visible": false,
+          "colorField": "intensity",
+          "colorMode": "flat",
+          "colorMap": "turbo",
+          "pointShape": "cube",
+          "flatColor": "#ff0000ff",
+          "pointSize": 15,
+          "cubeOutline": true,
+          "cubeSize": 0.03
+        },
+        "/registered_scan": {
+          "visible": false,
+          "colorField": "z",
+          "colorMode": "colormap",
+          "colorMap": "turbo",
+          "pointShape": "circle",
+          "pointSize": 6.49
+        },
+        "/detection3d/markers": {
+          "visible": false
+        },
+        "/foxglove/scene_update": {
+          "visible": true
+        },
+        "/scene_update": {
+          "visible": false
+        },
+        "/map": {
+          "visible": false,
+          "colorField": "z",
+          "colorMode": "colormap",
+          "colorMap": "turbo",
+          "pointSize": 8
         }
       },
       "layers": {},
@@ -305,40 +603,44 @@
           }
         },
         "synchronize": false,
-        "rotation": 0
+        "rotation": 0,
+        "calibrationTopic": "/camera_info"
       },
       "foxglovePanelTitle": ""
     },
-    "StateTransitions!pu21x4": {
+    "Plot!3heo336": {
       "paths": [
         {
-          "value": "/annotations.texts[1].text",
-          "timestampMethod": "receiveTime",
-          "label": "detection1"
+          "timestampMethod": "publishTime",
+          "value": "/image.header.stamp.sec",
+          "enabled": true,
+          "color": "#4e98e2",
+          "label": "image",
+          "showLine": false
         },
         {
-          "value": "/annotations.texts[3].text",
-          "timestampMethod": "receiveTime",
-          "label": "detection2"
-        }
-      ],
-      "isSynced": true,
-      "showPoints": true,
-      "timeWindowMode": "automatic"
-    },
-    "Plot!a1gj37": {
-      "paths": [
+          "timestampMethod": "publishTime",
+          "value": "/map.header.stamp.sec",
+          "enabled": true,
+          "color": "#f5774d",
+          "label": "lidar",
+          "showLine": false
+        },
         {
-          "timestampMethod": "receiveTime",
-          "value": "/annotations.points[0].points[0].x",
+          "timestampMethod": "publishTime",
+          "value": "/tf.transforms[0].header.stamp.sec",
           "enabled": true,
-          "color": "#4e98e2"
+          "color": "#f7df71",
+          "label": "tf",
+          "showLine": false
         },
         {
-          "timestampMethod": "receiveTime",
-          "value": "/annotations.points[0].points[0].y",
+          "timestampMethod": "publishTime",
+          "value": "/odom.header.stamp.sec",
           "enabled": true,
-          "color": "#f5774d"
+          "color": "#5cd6a9",
+          "label": "odom",
+          "showLine": false
         }
       ],
       "showXAxisLabels": true,
@@ -349,6 +651,161 @@
       "isSynced": true,
       "xAxisVal": "timestamp",
       "sidebarDimension": 240
+    },
+    "Image!47pi3ov": {
+      "cameraState": {
+        "distance": 20,
+        "perspective": true,
+        "phi": 60,
+        "target": [
+          0,
+          0,
+          0
+        ],
+        "targetOffset": [
+          0,
+          0,
+          0
+        ],
+        "targetOrientation": [
+          0,
+          0,
+          0,
+          1
+        ],
+        "thetaOffset": 45,
+        "fovy": 45,
+        "near": 0.5,
+        "far": 5000
+      },
+      "followMode": "follow-pose",
+      "scene": {},
+      "transforms": {},
+      "topics": {},
+      "layers": {},
+      "publish": {
+        "type": "point",
+        "poseTopic": "/move_base_simple/goal",
+        "pointTopic": "/clicked_point",
+        "poseEstimateTopic": "/initialpose",
+        "poseEstimateXDeviation": 0.5,
+        "poseEstimateYDeviation": 0.5,
+        "poseEstimateThetaDeviation": 0.26179939
+      },
+      "imageMode": {
+        "imageTopic": "/detected/image/0"
+      }
+    },
+    "Image!4kk50gw": {
+      "cameraState": {
+        "distance": 20,
+        "perspective": true,
+        "phi": 60,
+        "target": [
+          0,
+          0,
+          0
+        ],
+        "targetOffset": [
+          0,
+          0,
+          0
+        ],
+        "targetOrientation": [
+          0,
+          0,
+          0,
+          1
+        ],
+        "thetaOffset": 45,
+        "fovy": 45,
+        "near": 0.5,
+        "far": 5000
+      },
+      "followMode": "follow-pose",
+      "scene": {},
+      "transforms": {},
+      "topics": {},
+      "layers": {},
+      "publish": {
+        "type": "point",
+        "poseTopic": "/move_base_simple/goal",
+        "pointTopic": "/clicked_point",
+        "poseEstimateTopic": "/initialpose",
+        "poseEstimateXDeviation": 0.5,
+        "poseEstimateYDeviation": 0.5,
+        "poseEstimateThetaDeviation": 0.26179939
+      },
+      "imageMode": {
+        "imageTopic": "/detected/image/1"
+      }
+    },
+    "Image!2348e0b": {
+      "cameraState": {
+        "distance": 20,
+        "perspective": true,
+        "phi": 60,
+        "target": [
+          0,
+          0,
+          0
+        ],
+        "targetOffset": [
+          0,
+          0,
+          0
+        ],
+        "targetOrientation": [
+          0,
+          0,
+          0,
+          1
+        ],
+        "thetaOffset": 45,
+        "fovy": 45,
+        "near": 0.5,
+        "far": 5000
+      },
+      "followMode": "follow-pose",
+      "scene": {},
+      "transforms": {},
+      "topics": {},
+      "layers": {},
+      "publish": {
+        "type": "point",
+        "poseTopic": "/move_base_simple/goal",
+        "pointTopic": "/clicked_point",
+        "poseEstimateTopic": "/initialpose",
+        "poseEstimateXDeviation": 0.5,
+        "poseEstimateYDeviation": 0.5,
+        "poseEstimateThetaDeviation": 0.26179939
+      },
+      "imageMode": {
+        "imageTopic": "/detected/image/2",
+        "synchronize": false
+      }
+    },
+    "StateTransitions!pu21x4": {
+      "paths": [
+        {
+          "value": "/annotations.texts[1].text",
+          "timestampMethod": "receiveTime",
+          "label": "detection1"
+        },
+        {
+          "value": "/annotations.texts[3].text",
+          "timestampMethod": "receiveTime",
+          "label": "detection2"
+        },
+        {
+          "value": "/annotations.texts[5].text",
+          "timestampMethod": "receiveTime",
+          "label": "detection3"
+        }
+      ],
+      "isSynced": true,
+      "showPoints": true,
+      "timeWindowMode": "automatic"
     }
   },
   "globalVariables": {},
@@ -360,19 +817,33 @@
     "tracks": []
   },
   "layout": {
-    "first": "3D!18i6zy7",
+    "first": {
+      "first": "3D!18i6zy7",
+      "second": "Image!3mnp456",
+      "direction": "row",
+      "splitPercentage": 47.265625
+    },
     "second": {
-      "first": "Image!3mnp456",
+      "first": "Plot!3heo336",
       "second": {
-        "first": "StateTransitions!pu21x4",
-        "second": "Plot!a1gj37",
+        "first": {
+          "first": "Image!47pi3ov",
+          "second": {
+            "first": "Image!4kk50gw",
+            "second": "Image!2348e0b",
+            "direction": "row"
+          },
+          "direction": "row",
+          "splitPercentage": 33.06523681858802
+        },
+        "second": "StateTransitions!pu21x4",
         "direction": "column",
-        "splitPercentage": 32.661870503597136
+        "splitPercentage": 86.63101604278076
       },
-      "direction": "column",
-      "splitPercentage": 59.451575262543756
+      "direction": "row",
+      "splitPercentage": 46.39139486467731
     },
-    "direction": "row",
-    "splitPercentage": 55.3125
+    "direction": "column",
+    "splitPercentage": 81.62970106075217
   }
 }
diff --git a/data/.lfs/replay_g1.tar.gz b/data/.lfs/replay_g1.tar.gz
new file mode 100644
index 0000000000..67750bd0cf
--- /dev/null
+++ b/data/.lfs/replay_g1.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19ad1c53c4f8f9414c0921b94cd4c87e81bf0ad676881339f15ae2d8a8619311
+size 557410250
diff --git a/data/.lfs/replay_g1_run.tar.gz b/data/.lfs/replay_g1_run.tar.gz
new file mode 100644
index 0000000000..86368ec788
--- /dev/null
+++ b/data/.lfs/replay_g1_run.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00cf21f65a15994895150f74044f5d00d7aa873d24f071d249ecbd09cb8f2b26
+size 559554274
diff --git a/dimos/agents/memory/spatial_vector_db.py b/dimos/agents/memory/spatial_vector_db.py
index afc77df12a..a4eefb792b 100644
--- a/dimos/agents/memory/spatial_vector_db.py
+++ b/dimos/agents/memory/spatial_vector_db.py
@@ -200,6 +200,9 @@ def _process_query_results(self, results) -> List[Dict]:
         processed_results = []
 
         for i, vector_id in enumerate(results["ids"]):
+            if isinstance(vector_id, list) and not vector_id:
+                continue
+
             lookup_id = vector_id[0] if isinstance(vector_id, list) else vector_id
 
             # Create the result dictionary with metadata regardless of image availability
diff --git a/dimos/agents2/skills/navigation.py b/dimos/agents2/skills/navigation.py
index c20293f60e..7a2dd65edd 100644
--- a/dimos/agents2/skills/navigation.py
+++ b/dimos/agents2/skills/navigation.py
@@ -14,20 +14,22 @@
 
 import time
 from typing import Any, Optional
+
 import cv2
 from reactivex import Observable
+from reactivex.disposable import CompositeDisposable, Disposable
 
+from dimos.models.qwen.video_query import BBox
 from dimos.models.vl.qwen import QwenVlModel
+from dimos.msgs.geometry_msgs import PoseStamped
+from dimos.msgs.geometry_msgs.Vector3 import make_vector3
 from dimos.msgs.sensor_msgs import Image
 from dimos.navigation.visual.query import get_object_bbox_from_image
 from dimos.protocol.skill.skill import SkillContainer, skill
 from dimos.robot.robot import UnitreeRobot
 from dimos.types.robot_location import RobotLocation
-from dimos.models.qwen.video_query import BBox
-from dimos.msgs.geometry_msgs import PoseStamped
-from dimos.msgs.geometry_msgs.Vector3 import make_vector3
-from dimos.utils.transform_utils import euler_to_quaternion, quaternion_to_euler
 from dimos.utils.logging_config import setup_logger
+from dimos.utils.transform_utils import euler_to_quaternion, quaternion_to_euler
 from dimos.navigation.bt_navigator.navigator import NavigatorState
 from reactivex.disposable import Disposable, CompositeDisposable
 
@@ -203,11 +205,7 @@ def _get_bbox_for_current_frame(self, query: str) -> Optional[BBox]:
         if self._latest_image is None:
             return None
 
-        frame = cv2.cvtColor(self._latest_image.data, cv2.COLOR_RGB2BGR)
-        if frame is None:
-            return None
-
-        return get_object_bbox_from_image(self._vl_model, frame, query)
+        return get_object_bbox_from_image(self._vl_model, self._latest_image, query)
 
     def _navigate_using_semantic_map(self, query: str) -> str:
         results = self._robot.spatial_memory.query_by_text(query)
diff --git a/dimos/agents2/skills/ros_navigation.py b/dimos/agents2/skills/ros_navigation.py
new file mode 100644
index 0000000000..d989f95b5d
--- /dev/null
+++ b/dimos/agents2/skills/ros_navigation.py
@@ -0,0 +1,123 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+from typing import Any, Optional
+from dimos.msgs.geometry_msgs import PoseStamped
+from dimos.msgs.geometry_msgs.Vector3 import make_vector3
+from dimos.protocol.skill.skill import SkillContainer, skill
+from dimos.utils.logging_config import setup_logger
+from dimos.utils.transform_utils import euler_to_quaternion
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from dimos.robot.unitree_webrtc.unitree_g1 import UnitreeG1
+
+logger = setup_logger(__file__)
+
+
+class RosNavigation(SkillContainer):
+    _robot: "UnitreeG1"
+    _started: bool
+
+    def __init__(self, robot: "UnitreeG1"):
+        self._robot = robot
+        self._similarity_threshold = 0.23
+        self._started = False
+
+    def __enter__(self) -> "RosNavigation":
+        self._started = True
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self._close_module()
+        return False
+
+    @skill()
+    def navigate_with_text(self, query: str) -> str:
+        """Navigate to a location by querying the existing semantic map using natural language.
+
+        CALL THIS SKILL FOR ONE SUBJECT AT A TIME. For example: "Go to the person wearing a blue shirt in the living room",
+        you should call this skill twice, once for the person wearing a blue shirt and once for the living room.
+
+        Args:
+            query: Text query to search for in the semantic map
+        """
+
+        print("X" * 10000)
+
+        if not self._started:
+            raise ValueError(f"{self} has not been started.")
+
+        success_msg = self._navigate_using_semantic_map(query)
+        if success_msg:
+            return success_msg
+
+        return "Failed to navigate."
+
+    def _navigate_using_semantic_map(self, query: str) -> str:
+        results = self._robot.spatial_memory.query_by_text(query)
+
+        if not results:
+            return f"No matching location found in semantic map for '{query}'"
+
+        best_match = results[0]
+
+        goal_pose = self._get_goal_pose_from_result(best_match)
+
+        if not goal_pose:
+            return f"Found a result for '{query}' but it didn't have a valid position."
+
+        result = self._robot.nav.go_to(goal_pose)
+
+        if not result:
+            return f"Failed to navigate for '{query}'"
+
+        return f"Successfuly arrived at '{query}'"
+
+    @skill()
+    def stop_movement(self) -> str:
+        """Immediatly stop moving."""
+
+        if not self._started:
+            raise ValueError(f"{self} has not been started.")
+
+        self._robot.cancel_navigation()
+
+        return "Stopped"
+
+    def _get_goal_pose_from_result(self, result: dict[str, Any]) -> Optional[PoseStamped]:
+        similarity = 1.0 - (result.get("distance") or 1)
+        if similarity < self._similarity_threshold:
+            logger.warning(
+                f"Match found but similarity score ({similarity:.4f}) is below threshold ({self._similarity_threshold})"
+            )
+            return None
+
+        metadata = result.get("metadata")
+        if not metadata:
+            return None
+
+        first = metadata[0]
+        pos_x = first.get("pos_x", 0)
+        pos_y = first.get("pos_y", 0)
+        theta = first.get("rot_z", 0)
+
+        return PoseStamped(
+            ts=time.time(),
+            position=make_vector3(pos_x, pos_y, 0),
+            orientation=euler_to_quaternion(make_vector3(0, 0, theta)),
+            frame_id="map",
+        )
diff --git a/dimos/agents2/temp/webcam_agent.py b/dimos/agents2/temp/webcam_agent.py
index 8e2538f832..fed01ed96f 100644
--- a/dimos/agents2/temp/webcam_agent.py
+++ b/dimos/agents2/temp/webcam_agent.py
@@ -23,24 +23,23 @@
 import sys
 import time
 from pathlib import Path
-
-from dotenv import load_dotenv
-
-from dimos.agents2.cli.human import HumanInput
-
-# Add parent directories to path
-sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
-
 from threading import Thread
 
 import reactivex as rx
 import reactivex.operators as ops
+from dotenv import load_dotenv
 
 from dimos.agents2 import Agent, Output, Reducer, Stream, skill
+from dimos.agents2.cli.human import HumanInput
 from dimos.agents2.spec import Model, Provider
 from dimos.core import LCMTransport, Module, pLCMTransport, start
-from dimos.hardware.webcam import ColorCameraModule, Webcam
-from dimos.msgs.sensor_msgs import Image
+from dimos.hardware.camera import zed
+from dimos.hardware.camera.module import CameraModule
+from dimos.hardware.camera.webcam import Webcam
+from dimos.msgs.geometry_msgs import Quaternion, Transform, Vector3
+
+# from dimos.hardware.webcam import ColorCameraModule, Webcam
+from dimos.msgs.sensor_msgs import CameraInfo, Image
 from dimos.protocol.skill.test_coordinator import SkillContainerTest
 from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2
 from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer
@@ -110,7 +109,24 @@ def main():
     )
 
     testcontainer = dimos.deploy(SkillContainerTest)
-    webcam = dimos.deploy(ColorCameraModule, hardware=lambda: Webcam(camera_index=0))
+    webcam = dimos.deploy(
+        CameraModule,
+        transform=Transform(
+            translation=Vector3(0.0, 0.0, 0.0),
+            rotation=Quaternion(0.0, 0.0, 0.0, 1.0),
+            frame_id="base_link",
+            child_frame_id="camera_link",
+        ),
+        hardware=lambda: Webcam(
+            camera_index=0,
+            frequency=15,
+            stereo_slice="left",
+            camera_info=zed.CameraInfo.SingleWebcam,
+        ),
+    )
+
+    webcam.camera_info.transport = LCMTransport("/camera_info", CameraInfo)
+
     webcam.image.transport = LCMTransport("/image", Image)
 
     webcam.start()
diff --git a/dimos/core/module.py b/dimos/core/module.py
index e235ba16e0..0385aad041 100644
--- a/dimos/core/module.py
+++ b/dimos/core/module.py
@@ -117,7 +117,8 @@ def _close_rpc(self):
     @property
     def tf(self):
         if self._tf is None:
-            self._tf = self.config.tf_transport()
+            # self._tf = self.config.tf_transport()
+            self._tf = LCMTF()
         return self._tf
 
     @tf.setter
diff --git a/dimos/hardware/camera/module.py b/dimos/hardware/camera/module.py
new file mode 100644
index 0000000000..2b2880b80a
--- /dev/null
+++ b/dimos/hardware/camera/module.py
@@ -0,0 +1,127 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import queue
+import time
+from dataclasses import dataclass, field
+from typing import Any, Callable, Generic, Literal, Optional, Protocol, TypeVar
+
+import reactivex as rx
+from dimos_lcm.sensor_msgs import CameraInfo
+from reactivex import operators as ops
+from reactivex.disposable import Disposable
+from reactivex.observable import Observable
+
+from dimos.agents2 import Output, Reducer, Stream, skill
+from dimos.core import Module, Out, rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.hardware.camera.spec import (
+    CameraHardware,
+)
+from dimos.hardware.camera.webcam import Webcam, WebcamConfig
+from dimos.msgs.geometry_msgs import Quaternion, Transform, Vector3
+from dimos.msgs.sensor_msgs import Image
+from dimos.msgs.sensor_msgs.Image import Image, sharpness_barrier
+
+default_transform = lambda: Transform(
+    translation=Vector3(0.0, 0.0, 0.0),
+    rotation=Quaternion(0.0, 0.0, 0.0, 1.0),
+    frame_id="base_link",
+    child_frame_id="camera_link",
+)
+
+
+@dataclass
+class CameraModuleConfig(ModuleConfig):
+    frame_id: str = "camera_link"
+    transform: Optional[Transform] = field(default_factory=default_transform)
+    hardware: Callable[[], CameraHardware] | CameraHardware = Webcam
+
+
+class CameraModule(Module):
+    image: Out[Image] = None
+    camera_info: Out[CameraInfo] = None
+
+    hardware: CameraHardware = None
+    _module_subscription: Optional[Disposable] = None
+    _camera_info_subscription: Optional[Disposable] = None
+    _skill_stream: Optional[Observable[Image]] = None
+
+    default_config = CameraModuleConfig
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    @rpc
+    def start(self):
+        if callable(self.config.hardware):
+            self.hardware = self.config.hardware()
+        else:
+            self.hardware = self.config.hardware
+
+        if self._module_subscription:
+            return "already started"
+
+        stream = self.hardware.image_stream().pipe(sharpness_barrier(5))
+
+        # camera_info_stream = self.camera_info_stream(frequency=5.0)
+
+        def publish_info(camera_info: CameraInfo):
+            self.camera_info.publish(camera_info)
+
+            if self.config.transform is None:
+                return
+
+            camera_link = self.config.transform
+            camera_link.ts = camera_info.ts
+            camera_optical = Transform(
+                translation=Vector3(0.0, 0.0, 0.0),
+                rotation=Quaternion(-0.5, 0.5, -0.5, 0.5),
+                frame_id="camera_link",
+                child_frame_id="camera_optical",
+                ts=camera_link.ts,
+            )
+
+            self.tf.publish(camera_link, camera_optical)
+
+        self._camera_info_subscription = self.camera_info_stream().subscribe(publish_info)
+        self._module_subscription = stream.subscribe(self.image.publish)
+
+    @skill(stream=Stream.passive, output=Output.image, reducer=Reducer.latest)
+    def video_stream(self) -> Image:
+        """implicit video stream skill"""
+        _queue = queue.Queue(maxsize=1)
+        self.hardware.image_stream().subscribe(_queue.put)
+
+        for image in iter(_queue.get, None):
+            yield image
+
+    def camera_info_stream(self, frequency: float = 5.0) -> Observable[CameraInfo]:
+        def camera_info(_) -> CameraInfo:
+            self.hardware.camera_info.ts = time.time()
+            return self.hardware.camera_info
+
+        return rx.interval(1.0 / frequency).pipe(ops.map(camera_info))
+
+    def stop(self):
+        if self._module_subscription:
+            self._module_subscription.dispose()
+            self._module_subscription = None
+        if self._camera_info_subscription:
+            self._camera_info_subscription.dispose()
+            self._camera_info_subscription = None
+        # Also stop the hardware if it has a stop method
+        if self.hardware and hasattr(self.hardware, "stop"):
+            self.hardware.stop()
+        super().stop()
diff --git a/dimos/hardware/camera/spec.py b/dimos/hardware/camera/spec.py
new file mode 100644
index 0000000000..cc69db5d1c
--- /dev/null
+++ b/dimos/hardware/camera/spec.py
@@ -0,0 +1,55 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod, abstractproperty
+from typing import Generic, Optional, Protocol, TypeVar
+
+from dimos_lcm.sensor_msgs import CameraInfo
+from reactivex.observable import Observable
+
+from dimos.msgs.sensor_msgs import Image
+from dimos.protocol.service import Configurable
+
+
+class CameraConfig(Protocol):
+    frame_id_prefix: Optional[str]
+
+
+CameraConfigT = TypeVar("CameraConfigT", bound=CameraConfig)
+
+
+class CameraHardware(ABC, Configurable[CameraConfigT], Generic[CameraConfigT]):
+    @abstractmethod
+    def image_stream(self) -> Observable[Image]:
+        pass
+
+    @abstractproperty
+    def camera_info(self) -> CameraInfo:
+        pass
+
+
+# This is an example, feel free to change spec for stereo cameras
+# e.g., separate camera_info or streams for left/right, etc.
+class StereoCameraHardware(ABC, Configurable[CameraConfigT], Generic[CameraConfigT]):
+    @abstractmethod
+    def image_stream(self) -> Observable[Image]:
+        pass
+
+    @abstractmethod
+    def depth_stream(self) -> Observable[Image]:
+        pass
+
+    @abstractproperty
+    def camera_info(self) -> CameraInfo:
+        pass
diff --git a/dimos/hardware/camera/test_webcam.py b/dimos/hardware/camera/test_webcam.py
new file mode 100644
index 0000000000..0f6a509084
--- /dev/null
+++ b/dimos/hardware/camera/test_webcam.py
@@ -0,0 +1,108 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+
+import pytest
+
+from dimos import core
+from dimos.hardware.camera import zed
+from dimos.hardware.camera.module import CameraModule
+from dimos.hardware.camera.webcam import Webcam
+from dimos.msgs.geometry_msgs import Quaternion, Transform, Vector3
+from dimos.msgs.sensor_msgs import CameraInfo, Image
+
+
+@pytest.mark.tool
+def test_streaming_single():
+    dimos = core.start(1)
+
+    camera = dimos.deploy(
+        CameraModule,
+        transform=Transform(
+            translation=Vector3(0.05, 0.0, 0.0),
+            rotation=Quaternion(0.0, 0.0, 0.0, 1.0),
+            frame_id="sensor",
+            child_frame_id="camera_link",
+        ),
+        hardware=lambda: Webcam(
+            stereo_slice="left",
+            camera_index=0,
+            frequency=15,
+            camera_info=zed.CameraInfo.SingleWebcam,
+        ),
+    )
+
+    camera.image.transport = core.LCMTransport("/image1", Image)
+    camera.camera_info.transport = core.LCMTransport("/image1/camera_info", CameraInfo)
+    camera.start()
+
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        camera.stop()
+        dimos.stop()
+
+
+@pytest.mark.tool
+def test_streaming_double():
+    dimos = core.start(2)
+
+    camera1 = dimos.deploy(
+        CameraModule,
+        transform=Transform(
+            translation=Vector3(0.05, 0.0, 0.0),
+            rotation=Quaternion(0.0, 0.0, 0.0, 1.0),
+            frame_id="sensor",
+            child_frame_id="camera_link",
+        ),
+        hardware=lambda: Webcam(
+            stereo_slice="left",
+            camera_index=0,
+            frequency=15,
+            camera_info=zed.CameraInfo.SingleWebcam,
+        ),
+    )
+
+    camera2 = dimos.deploy(
+        CameraModule,
+        transform=Transform(
+            translation=Vector3(0.05, 0.0, 0.0),
+            rotation=Quaternion(0.0, 0.0, 0.0, 1.0),
+            frame_id="sensor",
+            child_frame_id="camera_link",
+        ),
+        hardware=lambda: Webcam(
+            camera_index=4,
+            frequency=15,
+            stereo_slice="left",
+            camera_info=zed.CameraInfo.SingleWebcam,
+        ),
+    )
+
+    camera1.image.transport = core.LCMTransport("/image1", Image)
+    camera1.camera_info.transport = core.LCMTransport("/image1/camera_info", CameraInfo)
+    camera1.start()
+    camera2.image.transport = core.LCMTransport("/image2", Image)
+    camera2.camera_info.transport = core.LCMTransport("/image2/camera_info", CameraInfo)
+    camera2.start()
+
+    try:
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        camera1.stop()
+        camera2.stop()
+        dimos.stop()
diff --git a/dimos/hardware/webcam.py b/dimos/hardware/camera/webcam.py
similarity index 69%
rename from dimos/hardware/webcam.py
rename to dimos/hardware/camera/webcam.py
index d7413ccb1b..87ba492d6e 100644
--- a/dimos/hardware/webcam.py
+++ b/dimos/hardware/camera/webcam.py
@@ -15,57 +15,36 @@
 import queue
 import threading
 import time
-from abc import ABC, abstractmethod, abstractproperty
 from dataclasses import dataclass, field
 from functools import cache
 from typing import Any, Callable, Generic, Literal, Optional, Protocol, TypeVar
 
 import cv2
-import numpy as np
 from dimos_lcm.sensor_msgs import CameraInfo
 from reactivex import create
 from reactivex.observable import Observable
 
-from dimos.agents2 import Output, Reducer, Stream, skill
-from dimos.core import Module, Out, rpc
-from dimos.core.module import DaskModule, ModuleConfig
+from dimos.hardware.camera.spec import (
+    CameraConfig,
+    CameraHardware,
+)
 from dimos.msgs.sensor_msgs import Image
 from dimos.msgs.sensor_msgs.Image import ImageFormat
-from dimos.protocol.service import Configurable, Service
 from dimos.utils.reactive import backpressure
 
 
-class CameraConfig(Protocol):
-    frame_id_prefix: Optional[str]
-
-
-CameraConfigT = TypeVar("CameraConfigT", bound=CameraConfig)
-
-
-# StereoCamera interface, for cameras that provide standard
-# color, depth, pointcloud, and pose messages
-class ColorCameraHardware(Configurable[CameraConfigT], Generic[CameraConfigT]):
-    @abstractmethod
-    def color_stream(self) -> Observable[Image]:
-        pass
-
-    @abstractproperty
-    def camera_info(self) -> CameraInfo:
-        pass
-
-
 @dataclass
 class WebcamConfig(CameraConfig):
-    camera_index: int = 0
+    camera_index: int = 0  # /dev/videoN
     frame_width: int = 640
     frame_height: int = 480
-    frequency: int = 10
+    frequency: int = 15
     camera_info: CameraInfo = field(default_factory=CameraInfo)
     frame_id_prefix: Optional[str] = None
     stereo_slice: Optional[Literal["left", "right"]] = None  # For stereo cameras
 
 
-class Webcam(ColorCameraHardware[WebcamConfig]):
+class Webcam(CameraHardware[WebcamConfig]):
     default_config = WebcamConfig
 
     def __init__(self, *args, **kwargs):
@@ -76,7 +55,7 @@ def __init__(self, *args, **kwargs):
         self._observer = None
 
     @cache
-    def color_stream(self) -> Observable[Image]:
+    def image_stream(self) -> Observable[Image]:
         """Create an observable that starts/stops camera on subscription"""
 
         def subscribe(observer, scheduler=None):
@@ -117,7 +96,6 @@ def start(self):
         self._capture_thread = threading.Thread(target=self._capture_loop, daemon=True)
         self._capture_thread.start()
 
-    @rpc
     def stop(self):
         """Stop capturing frames"""
         # Signal thread to stop
@@ -191,56 +169,6 @@ def _capture_loop(self):
 
     @property
     def camera_info(self) -> CameraInfo:
-        """Return the camera info from config"""
         return self.config.camera_info
 
     def emit(self, image: Image): ...
-
-    def image_stream(self):
-        return self.image.observable()
-
-
-@dataclass
-class ColorCameraModuleConfig(ModuleConfig):
-    hardware: Callable[[], ColorCameraHardware] | ColorCameraHardware = Webcam
-
-
-class ColorCameraModule(DaskModule):
-    image: Out[Image] = None
-    hardware: ColorCameraHardware = None
-    _module_subscription: Optional[Any] = None  # Subscription disposable
-    _skill_stream: Optional[Observable[Image]] = None
-    default_config = ColorCameraModuleConfig
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-    @rpc
-    def start(self):
-        if callable(self.config.hardware):
-            self.hardware = self.config.hardware()
-        else:
-            self.hardware = self.config.hardware
-
-        if self._module_subscription:
-            return "already started"
-        stream = self.hardware.color_stream()
-        self._module_subscription = stream.subscribe(self.image.publish)
-
-    @skill(stream=Stream.passive, output=Output.image, reducer=Reducer.latest)
-    def video_stream(self) -> Image:
-        """implicit video stream skill"""
-        _queue = queue.Queue(maxsize=1)
-        self.hardware.color_stream().subscribe(_queue.put)
-
-        for image in iter(_queue.get, None):
-            yield image
-
-    def stop(self):
-        if self._module_subscription:
-            self._module_subscription.dispose()
-            self._module_subscription = None
-        # Also stop the hardware if it has a stop method
-        if self.hardware and hasattr(self.hardware, "stop"):
-            self.hardware.stop()
-        super().stop()
diff --git a/dimos/hardware/test_webcam.py b/dimos/hardware/test_webcam.py
deleted file mode 100644
index e6d26d2b8d..0000000000
--- a/dimos/hardware/test_webcam.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2025 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import time
-
-import pytest
-
-from dimos.core import LCMTransport, start
-from dimos.hardware.webcam import ColorCameraModule, Webcam
-from dimos.msgs.sensor_msgs import Image
-
-
-@pytest.mark.tool
-def test_basic():
-    webcam = Webcam()
-    subscription = webcam.color_stream().subscribe(
-        on_next=lambda img: print(f"Got image: {img.width}x{img.height}"),
-        on_error=lambda e: print(f"Error: {e}"),
-        on_completed=lambda: print("Stream completed"),
-    )
-
-    # Keep the subscription alive for a few seconds
-    try:
-        time.sleep(3)
-    finally:
-        # Clean disposal
-        subscription.dispose()
-        print("Test completed")
-
-
-@pytest.mark.tool
-def test_module():
-    dimos = start(1)
-    # Deploy ColorCameraModule, not Webcam directly
-    camera_module = dimos.deploy(
-        ColorCameraModule,
-        hardware=lambda: Webcam(camera_index=4, frequency=30, stereo_slice="left"),
-    )
-    camera_module.image.transport = LCMTransport("/image", Image)
-    camera_module.start()
-
-    test_transport = LCMTransport("/image", Image)
-    test_transport.subscribe(print)
-
-    time.sleep(60)
-
-    print("shutting down")
-    camera_module.stop()
-    time.sleep(1.0)
-    dimos.stop()
-
-
-if __name__ == "__main__":
-    test_module()
diff --git a/dimos/mapping/types.py b/dimos/mapping/types.py
index 1ee2263e21..3ceb64c56b 100644
--- a/dimos/mapping/types.py
+++ b/dimos/mapping/types.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 from dataclasses import dataclass
 from typing import Optional, TypeAlias
 
diff --git a/dimos/models/vl/__init__.py b/dimos/models/vl/__init__.py
new file mode 100644
index 0000000000..8cb0a7944b
--- /dev/null
+++ b/dimos/models/vl/__init__.py
@@ -0,0 +1,2 @@
+from dimos.models.vl.base import VlModel
+from dimos.models.vl.qwen import QwenVlModel
diff --git a/dimos/models/vl/base.py b/dimos/models/vl/base.py
index c9df67b5fd..faab96363d 100644
--- a/dimos/models/vl/base.py
+++ b/dimos/models/vl/base.py
@@ -2,7 +2,9 @@
 
 import numpy as np
 
+from dimos.msgs.sensor_msgs import Image
+
 
 class VlModel(ABC):
     @abstractmethod
-    def query(self, image: np.ndarray, query: str) -> str: ...
+    def query(self, image: Image | np.ndarray, query: str) -> str: ...
diff --git a/dimos/models/vl/qwen.py b/dimos/models/vl/qwen.py
index 4944f081ae..05ad4715c5 100644
--- a/dimos/models/vl/qwen.py
+++ b/dimos/models/vl/qwen.py
@@ -1,12 +1,11 @@
 import os
-import base64
-import io
 from typing import Optional
+
 import numpy as np
 from openai import OpenAI
-from PIL import Image
 
 from dimos.models.vl.base import VlModel
+from dimos.msgs.sensor_msgs import Image
 
 
 class QwenVlModel(VlModel):
@@ -27,11 +26,19 @@ def __init__(self, api_key: Optional[str] = None, model_name: str = "qwen2.5-vl-
             api_key=api_key,
         )
 
-    def query(self, image: np.ndarray, query: str) -> str:
-        pil_image = Image.fromarray(image.astype("uint8"))
-        buffered = io.BytesIO()
-        pil_image.save(buffered, format="PNG")
-        img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    def query(self, image: Image | np.ndarray, query: str) -> str:
+        if isinstance(image, np.ndarray):
+            import warnings
+
+            warnings.warn(
+                "QwenVlModel.query should receive standard dimos Image type, not a numpy array",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
+            image = Image.from_numpy(image)
+
+        img_base64 = image.to_base64()
 
         response = self._client.chat.completions.create(
             model=self._model_name,
diff --git a/dimos/msgs/foxglove_msgs/Color.py b/dimos/msgs/foxglove_msgs/Color.py
new file mode 100644
index 0000000000..30362f837a
--- /dev/null
+++ b/dimos/msgs/foxglove_msgs/Color.py
@@ -0,0 +1,50 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import hashlib
+from dimos_lcm.foxglove_msgs import Color as LCMColor
+
+
+class Color(LCMColor):
+    """Color with convenience methods."""
+
+    @classmethod
+    def from_string(cls, name: str, alpha: float = 0.2) -> Color:
+        """Generate a consistent color from a string using hash function.
+
+        Args:
+            name: String to generate color from
+            alpha: Transparency value (0.0-1.0)
+
+        Returns:
+            Color instance with deterministic RGB values
+        """
+        # Hash the string to get consistent values
+        hash_obj = hashlib.md5(name.encode())
+        hash_bytes = hash_obj.digest()
+
+        # Use first 3 bytes for RGB (0-255)
+        r = hash_bytes[0] / 255.0
+        g = hash_bytes[1] / 255.0
+        b = hash_bytes[2] / 255.0
+
+        # Create and return color instance
+        color = cls()
+        color.r = r
+        color.g = g
+        color.b = b
+        color.a = alpha
+        return color
diff --git a/dimos/msgs/geometry_msgs/Transform.py b/dimos/msgs/geometry_msgs/Transform.py
index 468d009519..4db4c929a7 100644
--- a/dimos/msgs/geometry_msgs/Transform.py
+++ b/dimos/msgs/geometry_msgs/Transform.py
@@ -87,6 +87,9 @@ def lcm_transform(self) -> LCMTransformStamped:
             ),
         )
 
+    def apply(self, other: "Transform") -> "Transform":
+        return self.__add__(other)
+
     def __add__(self, other: "Transform") -> "Transform":
         """Compose two transforms (transform composition).
 
@@ -248,7 +251,7 @@ def from_pose(cls, frame_id: str, pose: "Pose | PoseStamped") -> "Transform":
         else:
             raise TypeError(f"Expected Pose or PoseStamped, got {type(pose).__name__}")
 
-    def to_pose(self) -> "PoseStamped":
+    def to_pose(self, **kwargs) -> "PoseStamped":
         """Create a Transform from a Pose or PoseStamped.
 
         Args:
@@ -262,9 +265,12 @@ def to_pose(self) -> "PoseStamped":
 
         # Handle both Pose and PoseStamped
         return PoseStamped(
-            position=self.translation,
-            orientation=self.rotation,
-            frame_id=self.frame_id,
+            **{
+                "position": self.translation,
+                "orientation": self.rotation,
+                "frame_id": self.frame_id,
+            },
+            **kwargs,
         )
 
     def to_matrix(self) -> "np.ndarray":
diff --git a/dimos/msgs/geometry_msgs/Vector3.py b/dimos/msgs/geometry_msgs/Vector3.py
index 72e2fcef36..2eb204693b 100644
--- a/dimos/msgs/geometry_msgs/Vector3.py
+++ b/dimos/msgs/geometry_msgs/Vector3.py
@@ -155,6 +155,10 @@ def getArrow():
 
         return f"{getArrow()} Vector {self.__repr__()}"
 
+    def agent_encode(self) -> dict:
+        """Encode the vector for agent communication."""
+        return {"x": self.x, "y": self.y, "z": self.z}
+
     def serialize(self) -> dict:
         """Serialize the vector to a tuple."""
         return {"type": "vector", "c": (self.x, self.y, self.z)}
diff --git a/dimos/msgs/sensor_msgs/Image.py b/dimos/msgs/sensor_msgs/Image.py
index b79e07639f..30c74fd243 100644
--- a/dimos/msgs/sensor_msgs/Image.py
+++ b/dimos/msgs/sensor_msgs/Image.py
@@ -13,17 +13,15 @@
 # limitations under the License.
 
 import base64
+import functools
 import time
 from dataclasses import dataclass, field
-from datetime import timedelta
 from enum import Enum
 from typing import Literal, Optional, Tuple, TypedDict
 
 import cv2
 import numpy as np
 import reactivex as rx
-
-# Import LCM types
 from dimos_lcm.sensor_msgs.Image import Image as LCMImage
 from dimos_lcm.std_msgs.Header import Header
 from reactivex import operators as ops
@@ -31,6 +29,12 @@
 from reactivex.scheduler import ThreadPoolScheduler
 
 from dimos.types.timestamped import Timestamped, TimestampedBufferCollection, to_human_readable
+from dimos.utils.reactive import quality_barrier
+
+try:
+    from sensor_msgs.msg import Image as ROSImage
+except ImportError:
+    ROSImage = None
 
 
 class ImageFormat(Enum):
@@ -297,6 +301,7 @@ def crop(self, x: int, y: int, width: int, height: int) -> "Image":
             ts=self.ts,
         )
 
+    @functools.cached_property
     def sharpness(self) -> float:
         """
         Compute the Tenengrad focus measure for an image.
@@ -497,6 +502,67 @@ def _parse_encoding(encoding: str) -> dict:
 
         return encoding_map[encoding]
 
+    @classmethod
+    def from_ros_msg(cls, ros_msg: ROSImage) -> "Image":
+        """Create an Image from a ROS sensor_msgs/Image message.
+
+        Args:
+            ros_msg: ROS Image message
+
+        Returns:
+            Image instance
+        """
+        # Convert timestamp from ROS header
+        ts = ros_msg.header.stamp.sec + (ros_msg.header.stamp.nanosec / 1_000_000_000)
+
+        # Parse encoding to determine format and data type
+        format_info = cls._parse_encoding(ros_msg.encoding)
+
+        # Convert data from ROS message (array.array) to numpy array
+        data_array = np.frombuffer(ros_msg.data, dtype=format_info["dtype"])
+
+        # Reshape to image dimensions
+        if format_info["channels"] == 1:
+            data_array = data_array.reshape((ros_msg.height, ros_msg.width))
+        else:
+            data_array = data_array.reshape(
+                (ros_msg.height, ros_msg.width, format_info["channels"])
+            )
+
+        # Crop to center 1/3 of the image (simulate 120-degree FOV from 360-degree)
+        original_width = data_array.shape[1]
+        crop_width = original_width // 3
+        start_x = (original_width - crop_width) // 2
+        end_x = start_x + crop_width
+
+        # Crop the image horizontally to center 1/3
+        if len(data_array.shape) == 2:
+            # Grayscale image
+            data_array = data_array[:, start_x:end_x]
+        else:
+            # Color image
+            data_array = data_array[:, start_x:end_x, :]
+
+        # Fix color channel order: if ROS sends RGB but we expect BGR, swap channels
+        # ROS typically uses rgb8 encoding, but OpenCV/our system expects BGR
+        if format_info["format"] == ImageFormat.RGB:
+            # Convert RGB to BGR by swapping channels
+            if len(data_array.shape) == 3 and data_array.shape[2] == 3:
+                data_array = data_array[:, :, [2, 1, 0]]  # RGB -> BGR
+                format_info["format"] = ImageFormat.BGR
+        elif format_info["format"] == ImageFormat.RGBA:
+            # Convert RGBA to BGRA by swapping channels
+            if len(data_array.shape) == 3 and data_array.shape[2] == 4:
+                data_array = data_array[:, :, [2, 1, 0, 3]]  # RGBA -> BGRA
+                format_info["format"] = ImageFormat.BGRA
+
+        return cls(
+            data=data_array,
+            format=format_info["format"],
+            frame_id=ros_msg.header.frame_id,
+            ts=ts,
+        )
+
     def __repr__(self) -> str:
         """String representation."""
         return (
@@ -522,18 +588,8 @@ def __len__(self) -> int:
 
 
 def sharpness_window(target_frequency: float, source: Observable[Image]) -> Observable[Image]:
-    window = TimestampedBufferCollection(1.0 / target_frequency)
-    source.subscribe(window.add)
-
-    thread_scheduler = ThreadPoolScheduler(max_workers=1)
-
-    def find_best(*argv):
-        if not window._items:
-            return None
+    raise NotImplementedError("use sharpness_barrier instead")
 
-        found = max(window._items, key=lambda x: x.sharpness())
-        return found
 
-    return rx.interval(1.0 / target_frequency).pipe(
-        ops.observe_on(thread_scheduler), ops.map(find_best), ops.filter(lambda x: x is not None)
-    )
+def sharpness_barrier(target_frequency: float):
+    return quality_barrier(lambda x: x.sharpness, target_frequency)
diff --git a/dimos/msgs/sensor_msgs/Joy.py b/dimos/msgs/sensor_msgs/Joy.py
new file mode 100644
index 0000000000..e528b304b6
--- /dev/null
+++ b/dimos/msgs/sensor_msgs/Joy.py
@@ -0,0 +1,181 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import time
+from typing import List, TypeAlias
+
+from dimos_lcm.sensor_msgs import Joy as LCMJoy
+
+try:
+    from sensor_msgs.msg import Joy as ROSJoy
+except ImportError:
+    ROSJoy = None
+
+from plum import dispatch
+
+from dimos.types.timestamped import Timestamped
+
+# Types that can be converted to/from Joy
+JoyConvertable: TypeAlias = (
+    tuple[List[float], List[int]] | dict[str, List[float] | List[int]] | LCMJoy
+)
+
+
+def sec_nsec(ts):
+    s = int(ts)
+    return [s, int((ts - s) * 1_000_000_000)]
+
+
+class Joy(Timestamped):
+    msg_name = "sensor_msgs.Joy"
+    ts: float
+    frame_id: str
+    axes: List[float]
+    buttons: List[int]
+
+    @dispatch
+    def __init__(
+        self,
+        ts: float = 0.0,
+        frame_id: str = "",
+        axes: List[float] | None = None,
+        buttons: List[int] | None = None,
+    ) -> None:
+        """Initialize a Joy message.
+
+        Args:
+            ts: Timestamp in seconds
+            frame_id: Frame ID for the message
+            axes: List of axis values (typically -1.0 to 1.0)
+            buttons: List of button states (0 or 1)
+        """
+        self.ts = ts if ts != 0 else time.time()
+        self.frame_id = frame_id
+        self.axes = axes if axes is not None else []
+        self.buttons = buttons if buttons is not None else []
+
+    @dispatch
+    def __init__(self, joy_tuple: tuple[List[float], List[int]]) -> None:
+        """Initialize from a tuple of (axes, buttons)."""
+        self.ts = time.time()
+        self.frame_id = ""
+        self.axes = list(joy_tuple[0])
+        self.buttons = list(joy_tuple[1])
+
+    @dispatch
+    def __init__(self, joy_dict: dict[str, List[float] | List[int]]) -> None:
+        """Initialize from a dictionary with 'axes' and 'buttons' keys."""
+        self.ts = joy_dict.get("ts", time.time())
+        self.frame_id = joy_dict.get("frame_id", "")
+        self.axes = list(joy_dict.get("axes", []))
+        self.buttons = list(joy_dict.get("buttons", []))
+
+    @dispatch
+    def __init__(self, joy: Joy) -> None:
+        """Initialize from another Joy (copy constructor)."""
+        self.ts = joy.ts
+        self.frame_id = joy.frame_id
+        self.axes = list(joy.axes)
+        self.buttons = list(joy.buttons)
+
+    @dispatch
+    def __init__(self, lcm_joy: LCMJoy) -> None:
+        """Initialize from an LCM Joy message."""
+        self.ts = lcm_joy.header.stamp.sec + (lcm_joy.header.stamp.nsec / 1_000_000_000)
+        self.frame_id = lcm_joy.header.frame_id
+        self.axes = list(lcm_joy.axes)
+        self.buttons = list(lcm_joy.buttons)
+
+    def lcm_encode(self) -> bytes:
+        lcm_msg = LCMJoy()
+        [lcm_msg.header.stamp.sec, lcm_msg.header.stamp.nsec] = sec_nsec(self.ts)
+        lcm_msg.header.frame_id = self.frame_id
+        lcm_msg.axes_length = len(self.axes)
+        lcm_msg.axes = self.axes
+        lcm_msg.buttons_length = len(self.buttons)
+        lcm_msg.buttons = self.buttons
+        return lcm_msg.lcm_encode()
+
+    @classmethod
+    def lcm_decode(cls, data: bytes) -> Joy:
+        lcm_msg = LCMJoy.lcm_decode(data)
+        return cls(
+            ts=lcm_msg.header.stamp.sec + (lcm_msg.header.stamp.nsec / 1_000_000_000),
+            frame_id=lcm_msg.header.frame_id,
+            axes=list(lcm_msg.axes) if lcm_msg.axes else [],
+            buttons=list(lcm_msg.buttons) if lcm_msg.buttons else [],
+        )
+
+    def __str__(self) -> str:
+        return (
+            f"Joy(axes={len(self.axes)} values, buttons={len(self.buttons)} values, "
+            f"frame_id='{self.frame_id}')"
+        )
+
+    def __repr__(self) -> str:
+        return (
+            f"Joy(ts={self.ts}, frame_id='{self.frame_id}', "
+            f"axes={self.axes}, buttons={self.buttons})"
+        )
+
+    def __eq__(self, other) -> bool:
+        """Check if two Joy messages are equal."""
+        if not isinstance(other, Joy):
+            return False
+        return (
+            self.axes == other.axes
+            and self.buttons == other.buttons
+            and self.frame_id == other.frame_id
+        )
+
+    @classmethod
+    def from_ros_msg(cls, ros_msg: ROSJoy) -> "Joy":
+        """Create a Joy from a ROS sensor_msgs/Joy message.
+
+        Args:
+            ros_msg: ROS Joy message
+
+        Returns:
+            Joy instance
+        """
+        # Convert timestamp from ROS header
+        ts = ros_msg.header.stamp.sec + (ros_msg.header.stamp.nanosec / 1_000_000_000)
+
+        return cls(
+            ts=ts,
+            frame_id=ros_msg.header.frame_id,
+            axes=list(ros_msg.axes),
+            buttons=list(ros_msg.buttons),
+        )
+
+    def to_ros_msg(self) -> ROSJoy:
+        """Convert to a ROS sensor_msgs/Joy message.
+
+        Returns:
+            ROS Joy message
+        """
+        ros_msg = ROSJoy()
+
+        # Set header
+        ros_msg.header.frame_id = self.frame_id
+        ros_msg.header.stamp.sec = int(self.ts)
+        ros_msg.header.stamp.nanosec = int((self.ts - int(self.ts)) * 1_000_000_000)
+
+        # Set axes and buttons
+        ros_msg.axes = self.axes
+        ros_msg.buttons = self.buttons
+
+        return ros_msg
diff --git a/dimos/msgs/sensor_msgs/PointCloud2.py b/dimos/msgs/sensor_msgs/PointCloud2.py
index 8004032ceb..d81c8d0198 100644
--- a/dimos/msgs/sensor_msgs/PointCloud2.py
+++ b/dimos/msgs/sensor_msgs/PointCloud2.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import functools
 import struct
 import time
 from typing import Optional
@@ -28,6 +29,8 @@
 from dimos_lcm.sensor_msgs.PointField import PointField
 from dimos_lcm.std_msgs.Header import Header
 
+from dimos.msgs.geometry_msgs import Vector3
+
 # Import ROS types
 try:
     from sensor_msgs.msg import PointCloud2 as ROSPointCloud2
@@ -73,14 +76,83 @@ def from_numpy(
         pcd.points = o3d.utility.Vector3dVector(points)
         return cls(pointcloud=pcd, ts=timestamp, frame_id=frame_id)
 
+    def __str__(self) -> str:
+        return f"PointCloud2(frame_id='{self.frame_id}', num_points={len(self.pointcloud.points)})"
+
+    @functools.cached_property
+    def center(self) -> Vector3:
+        """Calculate the center of the pointcloud in world frame."""
+        center = np.asarray(self.pointcloud.points).mean(axis=0)
+        return Vector3(*center)
+
     def points(self):
         return self.pointcloud.points
 
+    def __add__(self, other: PointCloud2) -> PointCloud2:
+        """Combine two PointCloud2 instances into one.
+
+        The resulting point cloud contains points from both inputs.
+        The frame_id and timestamp are taken from the first point cloud.
+
+        Args:
+            other: Another PointCloud2 instance to combine with
+
+        Returns:
+            New PointCloud2 instance containing combined points
+        """
+        if not isinstance(other, PointCloud2):
+            raise ValueError("Can only add PointCloud2 to another PointCloud2")
+
+        return PointCloud2(
+            pointcloud=self.pointcloud + other.pointcloud,
+            frame_id=self.frame_id,
+            ts=max(self.ts, other.ts),
+        )
+
     # TODO what's the usual storage here? is it already numpy?
     def as_numpy(self) -> np.ndarray:
         """Get points as numpy array."""
         return np.asarray(self.pointcloud.points)
 
+    @functools.cache
+    def get_axis_aligned_bounding_box(self) -> o3d.geometry.AxisAlignedBoundingBox:
+        """Get axis-aligned bounding box of the point cloud."""
+        return self.pointcloud.get_axis_aligned_bounding_box()
+
+    @functools.cache
+    def get_oriented_bounding_box(self) -> o3d.geometry.OrientedBoundingBox:
+        """Get oriented bounding box of the point cloud."""
+        return self.pointcloud.get_oriented_bounding_box()
+
+    @functools.cache
+    def get_bounding_box_dimensions(self) -> tuple[float, float, float]:
+        """Get dimensions (width, height, depth) of axis-aligned bounding box."""
+        bbox = self.get_axis_aligned_bounding_box()
+        extent = bbox.get_extent()
+        return tuple(extent)
+
+    def bounding_box_intersects(self, other: "PointCloud2") -> bool:
+        # Get axis-aligned bounding boxes
+        bbox1 = self.get_axis_aligned_bounding_box()
+        bbox2 = other.get_axis_aligned_bounding_box()
+
+        # Get min and max bounds
+        min1 = bbox1.get_min_bound()
+        max1 = bbox1.get_max_bound()
+        min2 = bbox2.get_min_bound()
+        max2 = bbox2.get_max_bound()
+
+        # Check overlap in all three dimensions
+        # Boxes intersect if they overlap in ALL dimensions
+        return (
+            min1[0] <= max2[0]
+            and max1[0] >= min2[0]
+            and min1[1] <= max2[1]
+            and max1[1] >= min2[1]
+            and min1[2] <= max2[2]
+            and max1[2] >= min2[2]
+        )
+
     def lcm_encode(self, frame_id: Optional[str] = None) -> bytes:
         """Convert to LCM PointCloud2 message."""
         msg = LCMPointCloud2()
diff --git a/dimos/msgs/sensor_msgs/__init__.py b/dimos/msgs/sensor_msgs/__init__.py
index a7afafe2f2..9a8a7b54fe 100644
--- a/dimos/msgs/sensor_msgs/__init__.py
+++ b/dimos/msgs/sensor_msgs/__init__.py
@@ -1,3 +1,4 @@
 from dimos.msgs.sensor_msgs.Image import Image, ImageFormat
 from dimos.msgs.sensor_msgs.PointCloud2 import PointCloud2
 from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo
+from dimos.msgs.sensor_msgs.Joy import Joy
diff --git a/dimos/msgs/sensor_msgs/test_Joy.py b/dimos/msgs/sensor_msgs/test_Joy.py
new file mode 100644
index 0000000000..fd11624b08
--- /dev/null
+++ b/dimos/msgs/sensor_msgs/test_Joy.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python3
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import time
+
+try:
+    from sensor_msgs.msg import Joy as ROSJoy
+    from std_msgs.msg import Header as ROSHeader
+
+    ROS_AVAILABLE = True
+except ImportError:
+    ROSJoy = None
+    ROSHeader = None
+    ROS_AVAILABLE = False
+
+from dimos.msgs.sensor_msgs.Joy import Joy
+
+
+def test_lcm_encode_decode():
+    """Test LCM encode/decode preserves Joy data."""
+    print("Testing Joy LCM encode/decode...")
+
+    # Create test joy message with sample gamepad data
+    original = Joy(
+        ts=1234567890.123456789,
+        frame_id="gamepad",
+        axes=[0.5, -0.25, 1.0, -1.0, 0.0, 0.75],  # 6 axes (e.g., left/right sticks + triggers)
+        buttons=[1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0],  # 12 buttons
+    )
+
+    # Encode to LCM bytes
+    encoded = original.lcm_encode()
+    assert isinstance(encoded, bytes)
+    assert len(encoded) > 0
+
+    # Decode back
+    decoded = Joy.lcm_decode(encoded)
+
+    # Verify all fields match
+    assert abs(decoded.ts - original.ts) < 1e-9
+    assert decoded.frame_id == original.frame_id
+    assert decoded.axes == original.axes
+    assert decoded.buttons == original.buttons
+
+    print("✓ Joy LCM encode/decode test passed")
+
+
+def test_initialization_methods():
+    """Test various initialization methods for Joy."""
+    print("Testing Joy initialization methods...")
+
+    # Test default initialization
+    joy1 = Joy()
+    assert joy1.axes == []
+    assert joy1.buttons == []
+    assert joy1.frame_id == ""
+    assert joy1.ts > 0  # Should have current time
+
+    # Test full initialization
+    joy2 = Joy(ts=1234567890.0, frame_id="xbox_controller", axes=[0.1, 0.2, 0.3], buttons=[1, 0, 1])
+    assert joy2.ts == 1234567890.0
+    assert joy2.frame_id == "xbox_controller"
+    assert joy2.axes == [0.1, 0.2, 0.3]
+    assert joy2.buttons == [1, 0, 1]
+
+    # Test tuple initialization
+    joy3 = Joy(([0.5, -0.5], [1, 1, 0]))
+    assert joy3.axes == [0.5, -0.5]
+    assert joy3.buttons == [1, 1, 0]
+
+    # Test dict initialization
+    joy4 = Joy({"axes": [0.7, 0.8], "buttons": [0, 1], "frame_id": "ps4_controller"})
+    assert joy4.axes == [0.7, 0.8]
+    assert joy4.buttons == [0, 1]
+    assert joy4.frame_id == "ps4_controller"
+
+    # Test copy constructor
+    joy5 = Joy(joy2)
+    assert joy5.ts == joy2.ts
+    assert joy5.frame_id == joy2.frame_id
+    assert joy5.axes == joy2.axes
+    assert joy5.buttons == joy2.buttons
+    assert joy5 is not joy2  # Different objects
+
+    print("✓ Joy initialization methods test passed")
+
+
+def test_equality():
+    """Test Joy equality comparison."""
+    print("Testing Joy equality...")
+
+    joy1 = Joy(ts=1000.0, frame_id="controller1", axes=[0.5, -0.5], buttons=[1, 0, 1])
+
+    joy2 = Joy(ts=1000.0, frame_id="controller1", axes=[0.5, -0.5], buttons=[1, 0, 1])
+
+    joy3 = Joy(
+        ts=1000.0,
+        frame_id="controller2",  # Different frame_id
+        axes=[0.5, -0.5],
+        buttons=[1, 0, 1],
+    )
+
+    joy4 = Joy(
+        ts=1000.0,
+        frame_id="controller1",
+        axes=[0.6, -0.5],  # Different axes
+        buttons=[1, 0, 1],
+    )
+
+    # Same content should be equal
+    assert joy1 == joy2
+
+    # Different frame_id should not be equal
+    assert joy1 != joy3
+
+    # Different axes should not be equal
+    assert joy1 != joy4
+
+    # Different type should not be equal
+    assert joy1 != "not a joy"
+    assert joy1 != 42
+
+    print("✓ Joy equality test passed")
+
+
+def test_string_representation():
+    """Test Joy string representations."""
+    print("Testing Joy string representations...")
+
+    joy = Joy(
+        ts=1234567890.123,
+        frame_id="test_controller",
+        axes=[0.1, -0.2, 0.3, 0.4],
+        buttons=[1, 0, 1, 0, 0, 1],
+    )
+
+    # Test __str__
+    str_repr = str(joy)
+    assert "Joy" in str_repr
+    assert "axes=4 values" in str_repr
+    assert "buttons=6 values" in str_repr
+    assert "test_controller" in str_repr
+
+    # Test __repr__
+    repr_str = repr(joy)
+    assert "Joy" in repr_str
+    assert "1234567890.123" in repr_str
+    assert "test_controller" in repr_str
+    assert "[0.1, -0.2, 0.3, 0.4]" in repr_str
+    assert "[1, 0, 1, 0, 0, 1]" in repr_str
+
+    print("✓ Joy string representation test passed")
+
+
+@pytest.mark.skipif(not ROS_AVAILABLE, reason="ROS not available")
+def test_ros_conversion():
+    """Test conversion to/from ROS Joy messages."""
+    print("Testing Joy ROS conversion...")
+
+    # Create a ROS Joy message
+    ros_msg = ROSJoy()
+    ros_msg.header = ROSHeader()
+    ros_msg.header.stamp.sec = 1234567890
+    ros_msg.header.stamp.nanosec = 123456789
+    ros_msg.header.frame_id = "ros_gamepad"
+    ros_msg.axes = [0.25, -0.75, 0.0, 1.0, -1.0]
+    ros_msg.buttons = [1, 1, 0, 0, 1, 0, 1, 0]
+
+    # Convert from ROS
+    joy = Joy.from_ros_msg(ros_msg)
+    assert abs(joy.ts - 1234567890.123456789) < 1e-9
+    assert joy.frame_id == "ros_gamepad"
+    assert joy.axes == [0.25, -0.75, 0.0, 1.0, -1.0]
+    assert joy.buttons == [1, 1, 0, 0, 1, 0, 1, 0]
+
+    # Convert back to ROS
+    ros_msg2 = joy.to_ros_msg()
+    assert ros_msg2.header.frame_id == "ros_gamepad"
+    assert ros_msg2.header.stamp.sec == 1234567890
+    assert abs(ros_msg2.header.stamp.nanosec - 123456789) < 100  # Allow small rounding
+    assert list(ros_msg2.axes) == [0.25, -0.75, 0.0, 1.0, -1.0]
+    assert list(ros_msg2.buttons) == [1, 1, 0, 0, 1, 0, 1, 0]
+
+    print("✓ Joy ROS conversion test passed")
+
+
+def test_edge_cases():
+    """Test Joy with edge cases."""
+    print("Testing Joy edge cases...")
+
+    # Empty axes and buttons
+    joy1 = Joy(axes=[], buttons=[])
+    assert joy1.axes == []
+    assert joy1.buttons == []
+    encoded = joy1.lcm_encode()
+    decoded = Joy.lcm_decode(encoded)
+    assert decoded.axes == []
+    assert decoded.buttons == []
+
+    # Large number of axes and buttons
+    many_axes = [float(i) / 100.0 for i in range(20)]
+    many_buttons = [i % 2 for i in range(32)]
+    joy2 = Joy(axes=many_axes, buttons=many_buttons)
+    assert len(joy2.axes) == 20
+    assert len(joy2.buttons) == 32
+    encoded = joy2.lcm_encode()
+    decoded = Joy.lcm_decode(encoded)
+    # Check axes with floating point tolerance
+    assert len(decoded.axes) == len(many_axes)
+    for i, (a, b) in enumerate(zip(decoded.axes, many_axes)):
+        assert abs(a - b) < 1e-6, f"Axis {i}: {a} != {b}"
+    assert decoded.buttons == many_buttons
+
+    # Extreme axis values
+    extreme_axes = [-1.0, 1.0, 0.0, -0.999999, 0.999999]
+    joy3 = Joy(axes=extreme_axes)
+    assert joy3.axes == extreme_axes
+
+    print("✓ Joy edge cases test passed")
+
+
+if __name__ == "__main__":
+    test_lcm_encode_decode()
+    test_initialization_methods()
+    test_equality()
+    test_string_representation()
+    if ROS_AVAILABLE:
+        test_ros_conversion()
+    test_edge_cases()
+    print("\nAll Joy tests passed! ✓")
diff --git a/dimos/msgs/sensor_msgs/test_PointCloud2.py b/dimos/msgs/sensor_msgs/test_PointCloud2.py
index ac115462be..cb18d6fd9d 100644
--- a/dimos/msgs/sensor_msgs/test_PointCloud2.py
+++ b/dimos/msgs/sensor_msgs/test_PointCloud2.py
@@ -234,6 +234,80 @@ def test_ros_conversion():
     print("\n✓ All ROS conversion tests passed!")
 
 
+def test_bounding_box_intersects():
+    """Test bounding_box_intersects method with various scenarios."""
+    # Test 1: Overlapping boxes
+    pc1 = PointCloud2.from_numpy(np.array([[0, 0, 0], [2, 2, 2]]))
+    pc2 = PointCloud2.from_numpy(np.array([[1, 1, 1], [3, 3, 3]]))
+    assert pc1.bounding_box_intersects(pc2) == True
+    assert pc2.bounding_box_intersects(pc1) == True  # Should be symmetric
+
+    # Test 2: Non-overlapping boxes
+    pc3 = PointCloud2.from_numpy(np.array([[0, 0, 0], [1, 1, 1]]))
+    pc4 = PointCloud2.from_numpy(np.array([[2, 2, 2], [3, 3, 3]]))
+    assert pc3.bounding_box_intersects(pc4) == False
+    assert pc4.bounding_box_intersects(pc3) == False
+
+    # Test 3: Touching boxes (edge case - should be True)
+    pc5 = PointCloud2.from_numpy(np.array([[0, 0, 0], [1, 1, 1]]))
+    pc6 = PointCloud2.from_numpy(np.array([[1, 1, 1], [2, 2, 2]]))
+    assert pc5.bounding_box_intersects(pc6) == True
+    assert pc6.bounding_box_intersects(pc5) == True
+
+    # Test 4: One box completely inside another
+    pc7 = PointCloud2.from_numpy(np.array([[0, 0, 0], [3, 3, 3]]))
+    pc8 = PointCloud2.from_numpy(np.array([[1, 1, 1], [2, 2, 2]]))
+    assert pc7.bounding_box_intersects(pc8) == True
+    assert pc8.bounding_box_intersects(pc7) == True
+
+    # Test 5: Boxes overlapping only in 2 dimensions (not all 3)
+    pc9 = PointCloud2.from_numpy(np.array([[0, 0, 0], [2, 2, 1]]))
+    pc10 = PointCloud2.from_numpy(np.array([[1, 1, 2], [3, 3, 3]]))
+    assert pc9.bounding_box_intersects(pc10) == False
+    assert pc10.bounding_box_intersects(pc9) == False
+
+    # Test 6: Real-world detection scenario with floating point coordinates
+    detection1_points = np.array(
+        [[-3.5, -0.3, 0.1], [-3.3, -0.2, 0.1], [-3.5, -0.3, 0.3], [-3.3, -0.2, 0.3]]
+    )
+    pc_det1 = PointCloud2.from_numpy(detection1_points)
+
+    detection2_points = np.array(
+        [[-3.4, -0.25, 0.15], [-3.2, -0.15, 0.15], [-3.4, -0.25, 0.35], [-3.2, -0.15, 0.35]]
+    )
+    pc_det2 = PointCloud2.from_numpy(detection2_points)
+
+    assert pc_det1.bounding_box_intersects(pc_det2) == True
+
+    # Test 7: Single point clouds
+    pc_single1 = PointCloud2.from_numpy(np.array([[1.0, 1.0, 1.0]]))
+    pc_single2 = PointCloud2.from_numpy(np.array([[1.0, 1.0, 1.0]]))
+    pc_single3 = PointCloud2.from_numpy(np.array([[2.0, 2.0, 2.0]]))
+
+    # Same point should intersect
+    assert pc_single1.bounding_box_intersects(pc_single2) == True
+    # Different points should not intersect
+    assert pc_single1.bounding_box_intersects(pc_single3) == False
+
+    # Test 8: Empty point clouds
+    pc_empty1 = PointCloud2.from_numpy(np.array([]).reshape(0, 3))
+    pc_empty2 = PointCloud2.from_numpy(np.array([]).reshape(0, 3))
+    pc_nonempty = PointCloud2.from_numpy(np.array([[1.0, 1.0, 1.0]]))
+
+    # Empty clouds should handle gracefully (Open3D returns inf bounds)
+    # This might raise an exception or return False - we should handle gracefully
+    try:
+        result = pc_empty1.bounding_box_intersects(pc_empty2)
+        # If no exception, verify behavior is consistent
+        assert isinstance(result, bool)
+    except:
+        # If it raises an exception, that's also acceptable for empty clouds
+        pass
+
+    print("✓ All bounding box intersection tests passed!")
+
+
 if __name__ == "__main__":
     test_lcm_encode_decode()
     test_ros_conversion()
+    test_bounding_box_intersects()
diff --git a/dimos/msgs/sensor_msgs/test_image.py b/dimos/msgs/sensor_msgs/test_image.py
index 7a3c84a8b6..0af559904a 100644
--- a/dimos/msgs/sensor_msgs/test_image.py
+++ b/dimos/msgs/sensor_msgs/test_image.py
@@ -14,8 +14,9 @@
 
 import numpy as np
 import pytest
+from reactivex import operators as ops
 
-from dimos.msgs.sensor_msgs.Image import Image, ImageFormat, sharpness_window
+from dimos.msgs.sensor_msgs.Image import Image, ImageFormat, sharpness_barrier, sharpness_window
 from dimos.utils.data import get_data
 from dimos.utils.testing import TimedSensorReplay
 
@@ -65,7 +66,7 @@ def test_opencv_conversion(img: Image):
 
 
 @pytest.mark.tool
-def test_sharpness_detector():
+def test_sharpness_stream():
     get_data("unitree_office_walk")  # Preload data for testing
     video_store = TimedSensorReplay(
         "unitree_office_walk/video", autocast=lambda x: Image.from_numpy(x).to_rgb()
@@ -74,55 +75,85 @@ def test_sharpness_detector():
     cnt = 0
     for image in video_store.iterate():
         cnt = cnt + 1
-        print(image.sharpness())
+        print(image.sharpness)
         if cnt > 30:
             return
 
 
-@pytest.mark.tool
-def test_sharpness_sliding_window_foxglove():
+def test_sharpness_barrier():
     import time
+    from unittest.mock import MagicMock
+
+    # Create mock images with known sharpness values
+    # This avoids loading real data from disk
+    mock_images = []
+    sharpness_values = [0.3711, 0.3241, 0.3067, 0.2583, 0.3665]  # Just 5 images for 1 window
+
+    for i, sharp in enumerate(sharpness_values):
+        img = MagicMock()
+        img.sharpness = sharp
+        img.ts = 1758912038.208 + i * 0.01  # Simulate timestamps
+        mock_images.append(img)
+
+    # Track what goes into windows and what comes out
+    start_wall_time = None
+    window_contents = []  # List of (wall_time, image)
+    emitted_images = []
+
+    def track_input(img):
+        """Track all images going into sharpness_barrier with wall-clock time"""
+        nonlocal start_wall_time
+        wall_time = time.time()
+        if start_wall_time is None:
+            start_wall_time = wall_time
+        relative_time = wall_time - start_wall_time
+        window_contents.append((relative_time, img))
+        return img
+
+    def track_output(img):
+        """Track what sharpness_barrier emits"""
+        emitted_images.append(img)
+
+    # Use 20Hz frequency (0.05s windows) for faster test
+    # Emit images at 100Hz to get ~5 per window
+    from reactivex import from_iterable, interval
+
+    source = from_iterable(mock_images).pipe(
+        ops.zip(interval(0.01)),  # 100Hz emission rate
+        ops.map(lambda x: x[0]),  # Extract just the image
+    )
 
-    from dimos.msgs.geometry_msgs import Vector3
-    from dimos.protocol.pubsub.lcmpubsub import LCM, Topic
-
-    lcm = LCM()
-    lcm.start()
-
-    ping = 0
-    sharp_topic = Topic("/sharp", Image)
-    all_topic = Topic("/all", Image)
-    sharpness_topic = Topic("/sharpness", Vector3)
-
-    get_data("unitree_office_walk")  # Preload data for testing
-    video_stream = TimedSensorReplay(
-        "unitree_office_walk/video", autocast=lambda x: Image.from_numpy(x).to_rgb()
-    ).stream()
+    source.pipe(
+        ops.do_action(track_input),  # Track inputs
+        sharpness_barrier(20),  # 20Hz = 0.05s windows
+        ops.do_action(track_output),  # Track outputs
+    ).run()
 
-    # Publish all images to all_topic
-    video_stream.subscribe(lambda x: lcm.publish(all_topic, x))
+    # Only need 0.08s for 1 full window at 20Hz plus buffer
+    time.sleep(0.08)
 
-    def sharpness_vector(x: Image):
-        nonlocal ping
-        sharpness = x.sharpness()
-        if ping:
-            y = 1
-            ping = ping - 1
-        else:
-            y = 0
+    # Verify we got correct emissions
+    assert len(emitted_images) >= 1, f"Expected at least 1 emission, got {len(emitted_images)}"
 
-        return Vector3([sharpness, y, 0])
+    # Group inputs by wall-clock windows and verify we got the sharpest
+    window_duration = 0.05  # 20Hz
 
-    video_stream.subscribe(lambda x: lcm.publish(sharpness_topic, sharpness_vector(x)))
+    # Test just the first window
+    for window_idx in range(min(1, len(emitted_images))):
+        window_start = window_idx * window_duration
+        window_end = window_start + window_duration
 
-    def pub_sharp(x: Image):
-        nonlocal ping
-        ping = 3
-        lcm.publish(sharp_topic, x)
+        # Get all images that arrived during this wall-clock window
+        window_imgs = [
+            img for wall_time, img in window_contents if window_start <= wall_time < window_end
+        ]
 
-    sharpness_window(
-        1,
-        source=video_stream,
-    ).subscribe(pub_sharp)
+        if window_imgs:
+            max_sharp = max(img.sharpness for img in window_imgs)
+            emitted_sharp = emitted_images[window_idx].sharpness
 
-    time.sleep(120)
+            # Verify we emitted the sharpest
+            assert abs(emitted_sharp - max_sharp) < 0.0001, (
+                f"Window {window_idx}: Emitted image (sharp={emitted_sharp}) "
+                f"is not the sharpest (max={max_sharp})"
+            )
diff --git a/dimos/msgs/std_msgs/Bool.py b/dimos/msgs/std_msgs/Bool.py
new file mode 100644
index 0000000000..6af250277e
--- /dev/null
+++ b/dimos/msgs/std_msgs/Bool.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Bool message type."""
+
+from typing import ClassVar
+
+from dimos_lcm.std_msgs import Bool as LCMBool
+
+try:
+    from std_msgs.msg import Bool as ROSBool
+except ImportError:
+    ROSBool = None
+
+
+class Bool(LCMBool):
+    """ROS-compatible Bool message."""
+
+    msg_name = "std_msgs.Bool"
+
+    def __init__(self, data: bool = False):
+        """Initialize Bool with data value."""
+        self.data = data
+
+    @classmethod
+    def from_ros_msg(cls, ros_msg: ROSBool) -> "Bool":
+        """Create a Bool from a ROS std_msgs/Bool message.
+
+        Args:
+            ros_msg: ROS Bool message
+
+        Returns:
+            Bool instance
+        """
+        return cls(data=ros_msg.data)
+
+    def to_ros_msg(self) -> ROSBool:
+        """Convert to a ROS std_msgs/Bool message.
+
+        Returns:
+            ROS Bool message
+        """
+        if ROSBool is None:
+            raise ImportError("ROS std_msgs not available")
+        ros_msg = ROSBool()
+        ros_msg.data = bool(self.data)
+        return ros_msg
diff --git a/dimos/msgs/std_msgs/__init__.py b/dimos/msgs/std_msgs/__init__.py
index 3ed93fa77d..898b1035b5 100644
--- a/dimos/msgs/std_msgs/__init__.py
+++ b/dimos/msgs/std_msgs/__init__.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from .Bool import Bool
 from .Header import Header
 from .Int32 import Int32
 
-__all__ = ["Header", "Int32"]
+__all__ = ["Bool", "Header", "Int32"]
diff --git a/dimos/navigation/visual/query.py b/dimos/navigation/visual/query.py
index 42ec9623ef..7f54664c31 100644
--- a/dimos/navigation/visual/query.py
+++ b/dimos/navigation/visual/query.py
@@ -13,25 +13,23 @@
 # limitations under the License.
 
 from typing import Optional
-import numpy as np
+
 from dimos.models.qwen.video_query import BBox
 from dimos.models.vl.base import VlModel
+from dimos.msgs.sensor_msgs import Image
 from dimos.utils.generic import extract_json_from_llm_response
 
 
 def get_object_bbox_from_image(
-    vl_model: VlModel, image_data: np.ndarray, object_description: str
+    vl_model: VlModel, image: Image, object_description: str
 ) -> Optional[BBox]:
-    if not isinstance(image_data, np.ndarray):
-        raise ValueError("Frame must be a numpy array")
-
     prompt = (
         f"Look at this image and find the '{object_description}'. "
         "Return ONLY a JSON object with format: {'name': 'object_name', 'bbox': [x1, y1, x2, y2]} "
         "where x1,y1 is the top-left and x2,y2 is the bottom-right corner of the bounding box. If not found, return None."
     )
 
-    response = vl_model.query(image_data, prompt)
+    response = vl_model.query(image, prompt)
 
     result = extract_json_from_llm_response(response)
     if not result:
diff --git a/dimos/perception/detection2d/conftest.py b/dimos/perception/detection2d/conftest.py
index c471965287..b19cdcbd00 100644
--- a/dimos/perception/detection2d/conftest.py
+++ b/dimos/perception/detection2d/conftest.py
@@ -12,21 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import functools
-from typing import Optional, TypedDict
+from typing import Callable, Optional, TypedDict, Union
 
 import pytest
 from dimos_lcm.foxglove_msgs.ImageAnnotations import ImageAnnotations
-from dimos_lcm.sensor_msgs import CameraInfo, PointCloud2
+from dimos_lcm.foxglove_msgs.SceneUpdate import SceneUpdate
+from dimos_lcm.visualization_msgs.MarkerArray import MarkerArray
 
-from dimos.core import start
-from dimos.core.transport import LCMTransport
 from dimos.msgs.geometry_msgs import Transform
+from dimos.msgs.sensor_msgs import CameraInfo
 from dimos.msgs.sensor_msgs.Image import Image
 from dimos.perception.detection2d.module2D import Detection2DModule
 from dimos.perception.detection2d.module3D import Detection3DModule
-from dimos.perception.detection2d.type import ImageDetections3D
-from dimos.protocol.service import lcmservice as lcm
+from dimos.perception.detection2d.moduleDB import ObjectDBModule
+from dimos.perception.detection2d.type import (
+    Detection2D,
+    Detection3D,
+    ImageDetections2D,
+    ImageDetections3D,
+)
 from dimos.protocol.tf import TF
 from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule
 from dimos.robot.unitree_webrtc.type.lidar import LidarMessage
@@ -44,123 +48,162 @@ class Moment(TypedDict, total=False):
     tf: TF
     annotations: Optional[ImageAnnotations]
     detections: Optional[ImageDetections3D]
+    markers: Optional[MarkerArray]
+    scene_update: Optional[SceneUpdate]
+
+
+class Moment2D(Moment):
+    detections2d: ImageDetections2D
+
+
+class Moment3D(Moment):
+    detections3d: ImageDetections3D
+
+
+@pytest.fixture
+def tf():
+    t = TF()
+    yield t
+    t.stop()
 
 
 @pytest.fixture
-def dimos_cluster():
-    dimos = start(5)
-    yield dimos
-    dimos.stop()
+def get_moment(tf):
+    def moment_provider(**kwargs) -> Moment:
+        seek = kwargs.get("seek", 10.0)
 
+        data_dir = "unitree_go2_lidar_corrected"
+        get_data(data_dir)
 
-@pytest.fixture(scope="function")
-def moment():
-    data_dir = "unitree_go2_lidar_corrected"
-    get_data(data_dir)
+        lidar_frame = TimedSensorReplay(f"{data_dir}/lidar").find_closest_seek(seek)
 
-    seek = 10
+        image_frame = TimedSensorReplay(
+            f"{data_dir}/video",
+        ).find_closest(lidar_frame.ts)
 
-    lidar_frame = TimedSensorReplay(f"{data_dir}/lidar").find_closest_seek(seek)
+        image_frame.frame_id = "camera_optical"
 
-    image_frame = TimedSensorReplay(
-        f"{data_dir}/video",
-    ).find_closest(lidar_frame.ts)
+        odom_frame = TimedSensorReplay(f"{data_dir}/odom", autocast=Odometry.from_msg).find_closest(
+            lidar_frame.ts
+        )
 
-    image_frame.frame_id = "camera_optical"
+        transforms = ConnectionModule._odom_to_tf(odom_frame)
+
+        tf.receive_transform(*transforms)
+        return {
+            "odom_frame": odom_frame,
+            "lidar_frame": lidar_frame,
+            "image_frame": image_frame,
+            "camera_info": ConnectionModule._camera_info(),
+            "transforms": transforms,
+            "tf": tf,
+        }
+
+    return moment_provider
 
-    odom_frame = TimedSensorReplay(f"{data_dir}/odom", autocast=Odometry.from_msg).find_closest(
-        lidar_frame.ts
-    )
 
-    transforms = ConnectionModule._odom_to_tf(odom_frame)
-
-    tf = TF()
-    tf.publish(*transforms)
-
-    yield {
-        "odom_frame": odom_frame,
-        "lidar_frame": lidar_frame,
-        "image_frame": image_frame,
-        "camera_info": ConnectionModule._camera_info(),
-        "transforms": transforms,
-        "tf": tf,
-    }
-
-    # Cleanup
-    tf.stop()
-
-
-@pytest.fixture(scope="function")
-def publish_lcm():
-    def publish(moment: Moment):
-        lcm.autoconf()
-
-        transports = []
-
-        try:
-            lidar_frame_transport: LCMTransport = LCMTransport("/lidar", LidarMessage)
-            lidar_frame_transport.publish(moment.get("lidar_frame"))
-            transports.append(lidar_frame_transport)
-
-            image_frame_transport: LCMTransport = LCMTransport("/image", Image)
-            image_frame_transport.publish(moment.get("image_frame"))
-            transports.append(image_frame_transport)
-
-            odom_frame_transport: LCMTransport = LCMTransport("/odom", Odometry)
-            odom_frame_transport.publish(moment.get("odom_frame"))
-            transports.append(odom_frame_transport)
-
-            camera_info_transport: LCMTransport = LCMTransport("/camera_info", CameraInfo)
-            camera_info_transport.publish(moment.get("camera_info"))
-            transports.append(camera_info_transport)
-
-            annotations = moment.get("annotations")
-            if annotations:
-                annotations_transport: LCMTransport = LCMTransport("/annotations", ImageAnnotations)
-                annotations_transport.publish(annotations)
-                transports.append(annotations_transport)
-
-            detections = moment.get("detections")
-            if detections:
-                for i, detection in enumerate(detections):
-                    detections_transport: LCMTransport = LCMTransport(
-                        f"/detected/pointcloud/{i}", PointCloud2
-                    )
-                    detections_transport.publish(detection.pointcloud)
-                    transports.append(detections_transport)
-
-                    detections_image_transport: LCMTransport = LCMTransport(
-                        f"/detected/image/{i}", Image
-                    )
-                    detections_image_transport.publish(detection.cropped_image())
-                    transports.append(detections_image_transport)
-        finally:
-            # Cleanup all transports immediately after publishing
-            for transport in transports:
-                if transport._started:
-                    transport.lcm.stop()
-
-    return publish
-
-
-@pytest.fixture(scope="function")
-def detections2d(moment: Moment):
+@pytest.fixture
+def detection2d(get_moment_2d) -> Detection2D:
+    moment = get_moment_2d(seek=10.0)
+    assert len(moment["detections2d"]) > 0, "No detections found in the moment"
+    return moment["detections2d"][0]
+
+
+@pytest.fixture
+def detection3d(get_moment_3d) -> Detection3D:
+    moment = get_moment_3d(seek=10.0)
+    assert len(moment["detections3d"]) > 0, "No detections found in the moment"
+    print(moment["detections3d"])
+    return moment["detections3d"][0]
+
+
+@pytest.fixture
+def get_moment_2d(get_moment) -> Callable[[], Moment2D]:
     module = Detection2DModule()
-    yield module.process_image_frame(moment["image_frame"])
+
+    def moment_provider(**kwargs) -> Moment2D:
+        moment = get_moment(**kwargs)
+        detections = module.process_image_frame(moment.get("image_frame"))
+
+        return {
+            **moment,
+            "detections2d": detections,
+        }
+
+    yield moment_provider
     module._close_module()
 
 
-@pytest.fixture(scope="function")
-def detections3d(moment: Moment):
+@pytest.fixture
+def get_moment_3d(get_moment_2d) -> Callable[[], Moment2D]:
+    module = None
+
+    def moment_provider(**kwargs) -> Moment2D:
+        nonlocal module
+        moment = get_moment_2d(**kwargs)
+
+        module = Detection3DModule(camera_info=moment["camera_info"])
+
+        camera_transform = moment["tf"].get("camera_optical", moment.get("lidar_frame").frame_id)
+        if camera_transform is None:
+            raise ValueError("No camera_optical transform in tf")
+
+        return {
+            **moment,
+            "detections3d": module.process_frame(
+                moment["detections2d"], moment["lidar_frame"], camera_transform
+            ),
+        }
+
+    yield moment_provider
+    print("Closing 3D detection module", module)
+    module._close_module()
+
+
+@pytest.fixture
+def object_db_module(get_moment):
+    """Create and populate an ObjectDBModule with detections from multiple frames."""
     module2d = Detection2DModule()
-    detections2d = module2d.process_image_frame(moment["image_frame"])
-    camera_transform = moment["tf"].get("camera_optical", "world")
-    if camera_transform is None:
-        raise ValueError("No camera_optical transform in tf")
+    module3d = Detection3DModule(camera_info=ConnectionModule._camera_info())
+    moduleDB = ObjectDBModule(
+        camera_info=ConnectionModule._camera_info(),
+        goto=lambda obj_id: None,  # No-op for testing
+    )
+
+    # Process 5 frames to build up object history
+    for i in range(5):
+        seek_value = 10.0 + (i * 2)
+        moment = get_moment(seek=seek_value)
+
+        # Process 2D detections
+        imageDetections2d = module2d.process_image_frame(moment["image_frame"])
 
-    module3d = Detection3DModule(camera_info=moment["camera_info"])
+        # Get camera transform
+        camera_transform = moment["tf"].get("camera_optical", moment.get("lidar_frame").frame_id)
 
-    yield module3d.process_frame(detections2d, moment["lidar_frame"], camera_transform)
+        # Process 3D detections
+        imageDetections3d = module3d.process_frame(
+            imageDetections2d, moment["lidar_frame"], camera_transform
+        )
 
+        # Add to database
+        moduleDB.add_detections(imageDetections3d)
+
+    yield moduleDB
     module2d._close_module()
     module3d._close_module()
+    moduleDB._close_module()
+
+
+@pytest.fixture
+def first_object(object_db_module):
+    """Get the first object from the database."""
+    objects = list(object_db_module.objects.values())
+    assert len(objects) > 0, "No objects found in database"
+    return objects[0]
+
+
+@pytest.fixture
+def all_objects(object_db_module):
+    """Get all objects from the database."""
+    return list(object_db_module.objects.values())
diff --git a/dimos/perception/detection2d/module2D.py b/dimos/perception/detection2d/module2D.py
index 92d1c0612b..82fb181be9 100644
--- a/dimos/perception/detection2d/module2D.py
+++ b/dimos/perception/detection2d/module2D.py
@@ -12,55 +12,158 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import functools
-from typing import Any, Callable, List, Optional
+import json
+import time
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Callable, Optional
 
+import numpy as np
 from dimos_lcm.foxglove_msgs.ImageAnnotations import (
     ImageAnnotations,
 )
 from reactivex import operators as ops
 from reactivex.observable import Observable
+from reactivex.subject import Subject
 
 from dimos.core import In, Module, Out, rpc
+from dimos.models.vl import QwenVlModel, VlModel
 from dimos.msgs.sensor_msgs import Image
+from dimos.msgs.sensor_msgs.Image import sharpness_barrier
 from dimos.msgs.vision_msgs import Detection2DArray
-from dimos.perception.detection2d.type import Detection2D, ImageDetections2D
+from dimos.perception.detection2d.type import (
+    Detection2D,
+    ImageDetections2D,
+    InconvinientDetectionFormat,
+)
 from dimos.perception.detection2d.yolo_2d_det import Yolo2DDetector
 from dimos.utils.reactive import backpressure
 
 
+class Detector(ABC):
+    @abstractmethod
+    def process_image(self, image: np.ndarray) -> InconvinientDetectionFormat: ...
+
+
+@dataclass
+class Config:
+    detector: Optional[Callable[[Any], Detector]] = Yolo2DDetector
+    max_freq: float = 0.5  # hz
+    vlmodel: VlModel = QwenVlModel
+
+
 class Detection2DModule(Module):
+    config: Config
+    detector: Detector
+
     image: In[Image] = None  # type: ignore
 
     detections: Out[Detection2DArray] = None  # type: ignore
     annotations: Out[ImageAnnotations] = None  # type: ignore
 
-    _initDetector = Yolo2DDetector
+    detected_image_0: Out[Image] = None  # type: ignore
+    detected_image_1: Out[Image] = None  # type: ignore
+    detected_image_2: Out[Image] = None  # type: ignore
+
+    detected_image_0: Out[Image] = None  # type: ignore
+    detected_image_1: Out[Image] = None  # type: ignore
+    detected_image_2: Out[Image] = None  # type: ignore
 
-    def __init__(self, *args, detector=Optional[Callable[[Any], Any]], **kwargs):
+    def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        if detector:
-            self._detectorClass = detector
-        self.detector = self._initDetector()
+        self.config: Config = Config(**kwargs)
+        self.detector = self.config.detector()
+        self.vlmodel = self.config.vlmodel()
+        self.vlm_detections_subject = Subject()
+
+    def vlm_query(self, query: str) -> ImageDetections2D:
+        image = self.sharp_image_stream().pipe(ops.take(1)).run()
+
+        full_query = f"""show me a bounding boxes in pixels for this query: `{query}`
+
+        format should be:
+        `[
+        [label, x1, y1, x2, y2]
+        ...
+        ]`
+
+        (etc, multiple matches are possible)
+
+        If there's no match return `[]`. Label is whatever you think is appropriate
+
+        Only respond with the coordinates, no other text."""
+
+        response = self.vlmodel.query(image, full_query)
+        coords = json.loads(response)
+
+        imageDetections = ImageDetections2D(image)
+
+        for track_id, detection_list in enumerate(coords):
+            if len(detection_list) != 5:
+                continue
+            name = detection_list[0]
+            bbox = list(map(float, detection_list[1:]))
+            imageDetections.detections.append(
+                Detection2D(
+                    bbox=bbox,
+                    track_id=track_id,
+                    class_id=-100,
+                    confidence=1.0,
+                    name=name,
+                    ts=time.time(),
+                    image=image,
+                )
+            )
+
+        print("vlm detected", imageDetections)
+        # Emit the VLM detections to the subject
+        self.vlm_detections_subject.on_next(imageDetections)
+
+        return imageDetections
 
     def process_image_frame(self, image: Image) -> ImageDetections2D:
-        detections = ImageDetections2D.from_detector(
+        print("Processing image frame for detections", image)
+        return ImageDetections2D.from_detector(
             image, self.detector.process_image(image.to_opencv())
         )
-        return detections
 
     @functools.cache
-    def detection_stream(self) -> Observable[ImageDetections2D]:
-        return backpressure(self.image.observable().pipe(ops.map(self.process_image_frame)))
+    def sharp_image_stream(self) -> Observable[Image]:
+        return backpressure(
+            self.image.observable().pipe(
+                sharpness_barrier(self.config.max_freq),
+            )
+        )
+
+    @functools.cache
+    def detection_stream_2d(self) -> Observable[ImageDetections2D]:
+        # self.vlm_detections_subject
+        # Regular detection stream from the detector
+        regular_detections = self.sharp_image_stream().pipe(ops.map(self.process_image_frame))
+        # Merge with VL model detections
+        return backpressure(regular_detections.pipe(ops.merge(self.vlm_detections_subject)))
 
     @rpc
     def start(self):
-        self.detection_stream().subscribe(
-            lambda det: self.detections.publish(det.to_ros_detection2d_array())
-        )
+        # self.detection_stream_2d().subscribe(
+        #    lambda det: self.detections.publish(det.to_ros_detection2d_array())
+        # )
 
-        self.detection_stream().subscribe(
+        def publish_cropped_images(detections: ImageDetections2D):
+            for index, detection in enumerate(detections[:3]):
+                image_topic = getattr(self, "detected_image_" + str(index))
+                image_topic.publish(detection.cropped_image())
+
+        self.detection_stream_2d().subscribe(
             lambda det: self.annotations.publish(det.to_foxglove_annotations())
         )
 
+        def publish_cropped(detections: ImageDetections2D):
+            for index, detection in enumerate(detections[:3]):
+                image_topic = getattr(self, "detected_image_" + str(index))
+                image_topic.publish(detection.cropped_image())
+
+        self.detection_stream_2d().subscribe(publish_cropped)
+
     @rpc
     def stop(self): ...
diff --git a/dimos/perception/detection2d/module3D.py b/dimos/perception/detection2d/module3D.py
index 98a1dc14af..0ad3517bf5 100644
--- a/dimos/perception/detection2d/module3D.py
+++ b/dimos/perception/detection2d/module3D.py
@@ -12,32 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import time
-from typing import List, Optional, Tuple
 
-import numpy as np
-from dimos_lcm.foxglove_msgs.ImageAnnotations import (
-    ImageAnnotations,
-)
 from dimos_lcm.sensor_msgs import CameraInfo
 from reactivex import operators as ops
+from reactivex.observable import Observable
 
 from dimos.core import In, Out, rpc
 from dimos.msgs.geometry_msgs import Transform
 from dimos.msgs.sensor_msgs import Image, PointCloud2
-from dimos.msgs.vision_msgs import Detection2DArray
 from dimos.perception.detection2d.module2D import Detection2DModule
-
-# from dimos.perception.detection2d.detic import Detic2DDetector
 from dimos.perception.detection2d.type import (
-    Detection2D,
     ImageDetections2D,
     ImageDetections3D,
 )
-
-# Type aliases for clarity
-ImageDetections = Tuple[Image, List[Detection2D]]
-ImageDetection = Tuple[Image, Detection2D]
+from dimos.perception.detection2d.type.detection3d import Detection3D
+from dimos.types.timestamped import align_timestamped
+from dimos.utils.reactive import backpressure
 
 
 class Detection3DModule(Detection2DModule):
@@ -45,164 +35,19 @@ class Detection3DModule(Detection2DModule):
 
     image: In[Image] = None  # type: ignore
     pointcloud: In[PointCloud2] = None  # type: ignore
-    # type: ignore
-    detections: Out[Detection2DArray] = None  # type: ignore
-    annotations: Out[ImageAnnotations] = None  # type: ignore
 
     detected_pointcloud_0: Out[PointCloud2] = None  # type: ignore
     detected_pointcloud_1: Out[PointCloud2] = None  # type: ignore
     detected_pointcloud_2: Out[PointCloud2] = None  # type: ignore
-    detected_image_0: Out[Image] = None  # type: ignore
-    detected_image_1: Out[Image] = None  # type: ignore
-    detected_image_2: Out[Image] = None  # type: ignore
+
+    detection_3d_stream: Observable[ImageDetections3D] = None
 
     def __init__(self, camera_info: CameraInfo, *args, **kwargs):
-        self.camera_info = camera_info
         super().__init__(*args, **kwargs)
-
-    def detect(self, image: Image) -> ImageDetections:
-        detections = Detection2D.from_detector(
-            self.detector.process_image(image.to_opencv()), ts=image.ts
-        )
-        return (image, detections)
-
-    def project_points_to_camera(
-        self,
-        points_3d: np.ndarray,
-        camera_matrix: np.ndarray,
-        extrinsics: Transform,
-    ) -> Tuple[np.ndarray, np.ndarray]:
-        """Project 3D points to 2D camera coordinates."""
-        # Transform points from world to camera_optical frame
-        points_homogeneous = np.hstack([points_3d, np.ones((points_3d.shape[0], 1))])
-        extrinsics_matrix = extrinsics.to_matrix()
-        points_camera = (extrinsics_matrix @ points_homogeneous.T).T
-
-        # Filter out points behind the camera
-        valid_mask = points_camera[:, 2] > 0
-        points_camera = points_camera[valid_mask]
-
-        # Project to 2D
-        points_2d_homogeneous = (camera_matrix @ points_camera[:, :3].T).T
-        points_2d = points_2d_homogeneous[:, :2] / points_2d_homogeneous[:, 2:3]
-
-        return points_2d, valid_mask
-
-    def filter_points_in_detections(
-        self,
-        detections: ImageDetections2D,
-        pointcloud: PointCloud2,
-        world_to_camera_transform: Transform,
-    ) -> List[Optional[PointCloud2]]:
-        """Filter lidar points that fall within detection bounding boxes."""
-        # Extract camera parameters
-        camera_info = self.camera_info
-        fx, fy, cx = camera_info.K[0], camera_info.K[4], camera_info.K[2]
-        cy = camera_info.K[5]
-        image_width = camera_info.width
-        image_height = camera_info.height
-
-        camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
-
-        # Convert pointcloud to numpy array
-        lidar_points = pointcloud.as_numpy()
-
-        # Project all points to camera frame
-        points_2d_all, valid_mask = self.project_points_to_camera(
-            lidar_points, camera_matrix, world_to_camera_transform
-        )
-        valid_3d_points = lidar_points[valid_mask]
-        points_2d = points_2d_all.copy()
-
-        # Filter points within image bounds
-        in_image_mask = (
-            (points_2d[:, 0] >= 0)
-            & (points_2d[:, 0] < image_width)
-            & (points_2d[:, 1] >= 0)
-            & (points_2d[:, 1] < image_height)
-        )
-        points_2d = points_2d[in_image_mask]
-        valid_3d_points = valid_3d_points[in_image_mask]
-
-        filtered_pointclouds: List[Optional[PointCloud2]] = []
-
-        for detection in detections:
-            # Extract bbox from Detection2D object
-            bbox = detection.bbox
-            x_min, y_min, x_max, y_max = bbox
-
-            # Find points within this detection box (with small margin)
-            margin = 5  # pixels
-            in_box_mask = (
-                (points_2d[:, 0] >= x_min - margin)
-                & (points_2d[:, 0] <= x_max + margin)
-                & (points_2d[:, 1] >= y_min - margin)
-                & (points_2d[:, 1] <= y_max + margin)
-            )
-
-            detection_points = valid_3d_points[in_box_mask]
-
-            # Create PointCloud2 message for this detection
-            if detection_points.shape[0] > 0:
-                detection_pointcloud = PointCloud2.from_numpy(
-                    detection_points,
-                    frame_id=pointcloud.frame_id,
-                    timestamp=pointcloud.ts,
-                )
-                filtered_pointclouds.append(detection_pointcloud)
-            else:
-                filtered_pointclouds.append(None)
-
-        return filtered_pointclouds
-
-    def combine_pointclouds(self, pointcloud_list: List[PointCloud2]) -> PointCloud2:
-        """Combine multiple pointclouds into a single one."""
-        # Filter out None values
-        valid_pointclouds = [pc for pc in pointcloud_list if pc is not None]
-
-        if not valid_pointclouds:
-            # Return empty pointcloud if no valid pointclouds
-            return PointCloud2.from_numpy(
-                np.array([]).reshape(0, 3), frame_id="world", timestamp=time.time()
-            )
-
-        # Combine all point arrays
-        all_points = np.vstack([pc.as_numpy() for pc in valid_pointclouds])
-
-        # Use frame_id and timestamp from first pointcloud
-        combined_pointcloud = PointCloud2.from_numpy(
-            all_points,
-            frame_id=valid_pointclouds[0].frame_id,
-            timestamp=valid_pointclouds[0].ts,
-        )
-
-        return combined_pointcloud
-
-    def hidden_point_removal(
-        self, camera_transform: Transform, pc: PointCloud2, radius: float = 100.0
-    ):
-        camera_position = camera_transform.inverse().translation
-        camera_pos_np = camera_position.to_numpy().reshape(3, 1)
-
-        pcd = pc.pointcloud
-        try:
-            _, visible_indices = pcd.hidden_point_removal(camera_pos_np, radius)
-            visible_pcd = pcd.select_by_index(visible_indices)
-
-            return PointCloud2(visible_pcd, frame_id=pc.frame_id, ts=pc.ts)
-        except Exception as e:
-            return pc
-
-    def cleanup_pointcloud(self, pc: PointCloud2) -> PointCloud2:
-        height = pc.filter_by_height(-0.05)
-        statistical, _ = height.pointcloud.remove_statistical_outlier(
-            nb_neighbors=20, std_ratio=2.0
-        )
-        return PointCloud2(statistical, pc.frame_id, pc.ts)
+        self.camera_info = camera_info
 
     def process_frame(
         self,
-        # these have to be timestamp aligned
         detections: ImageDetections2D,
         pointcloud: PointCloud2,
         transform: Transform,
@@ -210,57 +55,48 @@ def process_frame(
         if not transform:
             return ImageDetections3D(detections.image, [])
 
-        pointcloud_list = self.filter_points_in_detections(detections, pointcloud, transform)
-
+        print("3d projection", detections, pointcloud, transform)
         detection3d_list = []
-        for detection, pc in zip(detections, pointcloud_list):
-            if pc is None:
-                continue
-            pc = self.hidden_point_removal(transform, self.cleanup_pointcloud(pc))
-            if pc is None:
-                continue
-
-            detection3d_list.append(detection.to_3d(pointcloud=pc, transform=transform))
+        for detection in detections:
+            detection3d = Detection3D.from_2d(
+                detection,
+                world_pointcloud=pointcloud,
+                camera_info=self.camera_info,
+                world_to_optical_transform=transform,
+            )
+            if detection3d is not None:
+                detection3d_list.append(detection3d)
 
-        return ImageDetections3D(detections.image, detection3d_list)
+        ret = ImageDetections3D(detections.image, detection3d_list)
+        print("3d projection finished", ret)
+        return ret
 
     @rpc
     def start(self):
-        time_tolerance = 5.0  # seconds
+        super().start()
 
         def detection2d_to_3d(args):
             detections, pc = args
-            transform = self.tf.get("camera_optical", "world", detections.image.ts, time_tolerance)
+            transform = self.tf.get("camera_optical", pc.frame_id, detections.image.ts, 5.0)
             return self.process_frame(detections, pc, transform)
 
-        combined_stream = self.detection_stream().pipe(
-            ops.with_latest_from(self.pointcloud.observable()), ops.map(detection2d_to_3d)
-        )
-
-        self.detection_stream().subscribe(
-            lambda det: self.detections.publish(det.to_ros_detection2d_array())
-        )
+        self.detection_stream_3d = align_timestamped(
+            backpressure(self.detection_stream_2d()),
+            self.pointcloud.observable(),
+            match_tolerance=0.25,
+            buffer_size=20.0,
+        ).pipe(ops.map(detection2d_to_3d))
 
-        self.detection_stream().subscribe(
-            lambda det: self.annotations.publish(det.to_image_annotations())
-        )
+        # self.detection_stream_3d = backpressure(self.detection_stream_2d()).pipe(
+        #    ops.with_latest_from(self.pointcloud.observable()), ops.map(detection2d_to_3d)
+        # )
 
-        combined_stream.subscribe(self._handle_combined_detections)
+        self.detection_stream_3d.subscribe(self._publish_detections)
 
-    def _handle_combined_detections(self, detections: ImageDetections3D):
+    def _publish_detections(self, detections: ImageDetections3D):
         if not detections:
             return
 
-        print(detections)
-
-        if len(detections) > 0:
-            self.detected_pointcloud_0.publish(detections[0].pointcloud)
-            self.detected_image_0.publish(detections[0].cropped_image())
-
-        if len(detections) > 1:
-            self.detected_pointcloud_1.publish(detections[1].pointcloud)
-            self.detected_image_1.publish(detections[1].cropped_image())
-
-        if len(detections) > 3:
-            self.detected_pointcloud_2.publish(detections[2].pointcloud)
-            self.detected_image_2.publish(detections[2].cropped_image())
+        for index, detection in enumerate(detections[:3]):
+            pointcloud_topic = getattr(self, "detected_pointcloud_" + str(index))
+            pointcloud_topic.publish(detection.pointcloud)
diff --git a/dimos/perception/detection2d/moduleDB.py b/dimos/perception/detection2d/moduleDB.py
index 8e9205c4f3..052b65d6c7 100644
--- a/dimos/perception/detection2d/moduleDB.py
+++ b/dimos/perception/detection2d/moduleDB.py
@@ -11,46 +11,353 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import functools
+import threading
 import time
-from typing import List, Optional, Tuple
+from copy import copy
+from typing import Any, Callable, Dict, List, Optional
 
-import numpy as np
-from dimos_lcm.foxglove_msgs.ImageAnnotations import (
-    ImageAnnotations,
-)
-from dimos_lcm.sensor_msgs import CameraInfo
-from dimos_lcm.vision_msgs import Detection2D as ROSDetection2D
-from reactivex import operators as ops
+from dimos_lcm.foxglove_msgs.ImageAnnotations import ImageAnnotations
+from lcm_msgs.foxglove_msgs import SceneUpdate
+from reactivex.observable import Observable
 
+from dimos.agents2 import Agent, Output, Reducer, Stream, skill
 from dimos.core import In, Out, rpc
-from dimos.msgs.geometry_msgs import Transform
+from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Vector3
 from dimos.msgs.sensor_msgs import Image, PointCloud2
-from dimos.msgs.vision_msgs import Detection2DArray, Detection3DArray
-from dimos.perception.detection2d.module2D import Detection2DModule
+from dimos.msgs.vision_msgs import Detection2DArray
 from dimos.perception.detection2d.module3D import Detection3DModule
-from dimos.perception.detection2d.type import (
-    Detection2D,
-    Detection3D,
-    ImageDetections2D,
-    ImageDetections3D,
-)
-from dimos.protocol.skill import skill
+from dimos.perception.detection2d.type import Detection3D, ImageDetections3D, TableStr
+from dimos.protocol.skill.skill import skill
+from dimos.protocol.skill.type import Output, Reducer, Stream
+from dimos.types.timestamped import to_datetime
 
 
-class DetectionDBModule(Detection3DModule):
-    @rpc
-    def start(self):
-        super().start()
-        self.pointcloud_stream().subscribe(self.add_detections)
+# Represents an object in space, as collection of 3d detections over time
+class Object3D(Detection3D):
+    best_detection: Detection3D = None
+    center: Vector3 = None
+    track_id: str = None
+    detections: List[Detection3D]
+
+    def to_repr_dict(self) -> Dict[str, Any]:
+        return {
+            "object_id": self.track_id,
+            "detections": len(self.detections),
+            "center": "[" + ", ".join(list(map(lambda n: f"{n:1f}", self.center.to_list()))) + "]",
+        }
+
+    def __init__(self, track_id: str, detection: Optional[Detection3D] = None, *args, **kwargs):
+        if detection is None:
+            self.detections = []
+            return
+        self.ts = detection.ts
+        self.track_id = track_id
+        self.class_id = detection.class_id
+        self.name = detection.name
+        self.confidence = detection.confidence
+        self.pointcloud = detection.pointcloud
+        self.bbox = detection.bbox
+        self.transform = detection.transform
+        self.center = detection.center
+        self.frame_id = detection.frame_id
+        self.detections = [detection]
+        self.best_detection = detection
+
+    def __add__(self, detection: Detection3D) -> "Object3D":
+        new_object = Object3D(self.track_id)
+        new_object.bbox = detection.bbox
+        new_object.confidence = max(self.confidence, detection.confidence)
+        new_object.ts = max(self.ts, detection.ts)
+        new_object.track_id = self.track_id
+        new_object.class_id = self.class_id
+        new_object.name = self.name
+        new_object.transform = self.transform
+        new_object.pointcloud = self.pointcloud + detection.pointcloud
+        new_object.frame_id = self.frame_id
+        new_object.center = (self.center + detection.center) / 2
+        new_object.detections = self.detections + [detection]
+
+        if detection.bbox_2d_volume() > self.bbox_2d_volume():
+            new_object.best_detection = detection
+        else:
+            new_object.best_detection = self.best_detection
+
+        return new_object
+
+    @property
+    def image(self) -> Image:
+        return self.best_detection.image
+
+    def scene_entity_label(self) -> str:
+        return f"{self.name} ({len(self.detections)})"
+
+    def agent_encode(self):
+        return {
+            "id": self.track_id,
+            "name": self.name,
+            "detections": len(self.detections),
+            "last_seen": f"{round((time.time() - self.ts))}s ago",
+            # "position": self.to_pose().position.agent_encode(),
+        }
+
+    def to_pose(self) -> PoseStamped:
+        optical_inverse = Transform(
+            translation=Vector3(0.0, 0.0, 0.0),
+            rotation=Quaternion(-0.5, 0.5, -0.5, 0.5),
+            frame_id="camera_link",
+            child_frame_id="camera_optical",
+        ).inverse()
+
+        print("transform is", self.best_detection.transform)
+
+        global_transform = optical_inverse + self.best_detection.transform
+
+        print("inverse optical is", global_transform)
+
+        print("obj center is", self.center)
+        global_pose = global_transform.to_pose()
+        print("Global pose:", global_pose)
+        global_pose.frame_id = self.best_detection.frame_id
+        print("remap to", self.best_detection.frame_id)
+        return PoseStamped(
+            position=self.center, orientation=Quaternion(), frame_id=self.best_detection.frame_id
+        )
 
-    def add_detections(self, detections: List[Detection3DArray]):
-        for det in detections:
-            self.add_detection(det)
+
+class ObjectDBModule(Detection3DModule, TableStr):
+    cnt: int = 0
+    objects: dict[str, Object3D]
+    object_stream: Observable[Object3D] = None
+
+    goto: Callable[[PoseStamped], Any] = None
+
+    image: In[Image] = None  # type: ignore
+    pointcloud: In[PointCloud2] = None  # type: ignore
+
+    detections: Out[Detection2DArray] = None  # type: ignore
+    annotations: Out[ImageAnnotations] = None  # type: ignore
+
+    detected_pointcloud_0: Out[PointCloud2] = None  # type: ignore
+    detected_pointcloud_1: Out[PointCloud2] = None  # type: ignore
+    detected_pointcloud_2: Out[PointCloud2] = None  # type: ignore
+
+    detected_image_0: Out[Image] = None  # type: ignore
+    detected_image_1: Out[Image] = None  # type: ignore
+    detected_image_2: Out[Image] = None  # type: ignore
+
+    scene_update: Out[SceneUpdate] = None  # type: ignore
+
+    target: Out[PoseStamped] = None  # type: ignore
+
+    remembered_locations: Dict[str, PoseStamped]
+
+    def __init__(self, goto: Callable[[PoseStamped], Any], *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.goto = goto
+        self.objects = {}
+        self.remembered_locations = {}
+
+    def closest_object(self, detection: Detection3D) -> Optional[Object3D]:
+        # Filter objects to only those with matching names
+        matching_objects = [obj for obj in self.objects.values() if obj.name == detection.name]
+
+        if not matching_objects:
+            return None
+
+        # Sort by distance
+        distances = sorted(matching_objects, key=lambda obj: detection.center.distance(obj.center))
+
+        return distances[0]
+
+    def add_detections(self, detections: List[Detection3D]) -> List[Object3D]:
+        return [
+            detection for detection in map(self.add_detection, detections) if detection is not None
+        ]
 
     def add_detection(self, detection: Detection3D):
-        print("DETECTION", detection)
+        """Add detection to existing object or create new one."""
+        closest = self.closest_object(detection)
+        if closest and closest.bounding_box_intersects(detection):
+            return self.add_to_object(closest, detection)
+        else:
+            return self.create_new_object(detection)
+
+    def add_to_object(self, closest: Object3D, detection: Detection3D):
+        new_object = closest + detection
+        self.objects[closest.track_id] = new_object
+        return new_object
+
+    def create_new_object(self, detection: Detection3D):
+        new_object = Object3D(f"obj_{self.cnt}", detection)
+        self.objects[new_object.track_id] = new_object
+        self.cnt += 1
+        return new_object
+
+    def agent_encode(self) -> List[Any]:
+        ret = []
+        for obj in copy(self.objects).values():
+            # we need at least 3 detectieons to consider it a valid object
+            # for this to be serious we need a ratio of detections within the window of observations
+            # if len(obj.detections) < 3:
+            #    continue
+            ret.append(str(obj.agent_encode()))
+        if not ret:
+            return "No objects detected yet."
+        return "\n".join(ret)
+
+    def vlm_query(self, description: str) -> str:
+        imageDetections2D = super().vlm_query(description)
+        print("VLM query found", imageDetections2D, "detections")
+        time.sleep(3)
+
+        if not imageDetections2D.detections:
+            return None
+
+        ret = []
+        for obj in self.objects.values():
+            if obj.ts != imageDetections2D.ts:
+                print(
+                    "Skipping",
+                    obj.track_id,
+                    "ts",
+                    obj.ts,
+                    "!=",
+                    imageDetections2D.ts,
+                )
+                continue
+            if obj.class_id != -100:
+                continue
+            if obj.name != imageDetections2D.detections[0].name:
+                print("Skipping", obj.name, "!=", imageDetections2D.detections[0].name)
+                continue
+            ret.append(obj)
+        ret.sort(key=lambda x: x.ts)
+
+        return ret[0] if ret else None
+
+    @skill()
+    def remember_location(self, name: str) -> str:
+        """Remember the current location with a name."""
+        transform = self.tf.get("map", "sensor", time_point=time.time(), time_tolerance=1.0)
+        if not transform:
+            return f"Could not get current location transform from map to sensor"
+
+        pose = transform.to_pose()
+        pose.frame_id = "map"
+        self.remembered_locations[name] = pose
+        return f"Location '{name}' saved at position: {pose.position}"
+
+    @skill()
+    def goto_remembered_location(self, name: str) -> str:
+        """Go to a remembered location by name."""
+        pose = self.remembered_locations.get(name, None)
+        if not pose:
+            return f"Location {name} not found. Known locations: {list(self.remembered_locations.keys())}"
+        self.goto(pose)
+        return f"Navigating to remembered location {name} and pose {pose}"
+
+    @skill()
+    def list_remembered_locations(self) -> List[str]:
+        """List all remembered locations."""
+        return str(list(self.remembered_locations.keys()))
+
+    def nav_to(self, target_pose) -> str:
+        target_pose.orientation = Quaternion(0.0, 0.0, 0.0, 0.0)
+        self.target.publish(target_pose)
+        time.sleep(0.1)
+        self.target.publish(target_pose)
+        self.goto(target_pose)
+
+    @skill()
+    def navigate_to_object_in_view(self, query: str) -> str:
+        """Navigate to an object in your current image view via natural language query using vision-language model to find it."""
+        target_obj = self.vlm_query(query)
+        if not target_obj:
+            return f"No objects found matching '{query}'"
+        return self.navigate_to_object_by_id(target_obj.track_id)
+
+    @skill(reducer=Reducer.all)
+    def list_objects(self):
+        """List all detected objects that the system remembers and can navigate to."""
+        data = self.agent_encode()
+        return data
+
+    @skill()
+    def navigate_to_object_by_id(self, object_id: str):
+        """Navigate to an object by an object id"""
+        target_obj = self.objects.get(object_id, None)
+        if not target_obj:
+            return f"Object {object_id} not found\nHere are the known objects:\n{str(self.agent_encode())}"
+        target_pose = target_obj.to_pose()
+        target_pose.frame_id = "map"
+        self.target.publish(target_pose)
+        time.sleep(0.1)
+        self.target.publish(target_pose)
+        self.nav_to(target_pose)
+        return f"Navigating to f{object_id} f{target_obj.name}"
 
     def lookup(self, label: str) -> List[Detection3D]:
         """Look up a detection by label."""
         return []
+
+    @rpc
+    def start(self):
+        super().start()
+
+        def update_objects(imageDetections: ImageDetections3D):
+            for detection in imageDetections.detections:
+                # print(detection)
+                return self.add_detection(detection)
+
+        def scene_thread():
+            while True:
+                scene_update = self.to_foxglove_scene_update()
+                self.scene_update.publish(scene_update)
+                time.sleep(1.0)
+
+        threading.Thread(target=scene_thread, daemon=True).start()
+
+        self.detection_stream_3d.subscribe(update_objects)
+
+    def goto_object(self, object_id: str) -> Optional[Object3D]:
+        """Go to object by id."""
+        return self.objects.get(object_id, None)
+
+    def to_foxglove_scene_update(self) -> "SceneUpdate":
+        """Convert all detections to a Foxglove SceneUpdate message.
+
+        Returns:
+            SceneUpdate containing SceneEntity objects for all detections
+        """
+
+        # Create SceneUpdate message with all detections
+        scene_update = SceneUpdate()
+        scene_update.deletions_length = 0
+        scene_update.deletions = []
+        scene_update.entities = []
+
+        for obj in copy(self.objects).values():
+            # we need at least 3 detectieons to consider it a valid object
+            # for this to be serious we need a ratio of detections within the window of observations
+            if obj.class_id != -100 and len(obj.detections) < 3:
+                continue
+
+            # print(
+            #    f"Object {obj.track_id}: {len(obj.detections)} detections, confidence {obj.confidence}"
+            # )
+            # print(obj.to_pose())
+
+            scene_update.entities.append(
+                obj.to_foxglove_scene_entity(
+                    entity_id=f"object_{obj.name}_{obj.track_id}_{len(obj.detections)}"
+                )
+            )
+
+        scene_update.entities_length = len(scene_update.entities)
+        return scene_update
+
+    def __len__(self):
+        return len(self.objects.values())
+
+    def __iter__(self):
+        return iter(self.detections.values())
diff --git a/dimos/perception/detection2d/test_module.py b/dimos/perception/detection2d/test_module.py
deleted file mode 100644
index 09ce3d1769..0000000000
--- a/dimos/perception/detection2d/test_module.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright 2025 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import pytest
-from dimos_lcm.foxglove_msgs.ImageAnnotations import (
-    ImageAnnotations,
-)
-from dimos_lcm.sensor_msgs import Image, PointCloud2
-
-from dimos.core import LCMTransport
-from dimos.msgs.geometry_msgs import PoseStamped, Transform, Vector3
-from dimos.msgs.nav_msgs import OccupancyGrid
-from dimos.msgs.sensor_msgs import PointCloud2 as PointCloud2Msg
-from dimos.msgs.vision_msgs import Detection2DArray
-from dimos.perception.detection2d.conftest import Moment
-from dimos.perception.detection2d.module2D import Detection2DModule
-from dimos.perception.detection2d.module3D import Detection3DModule
-from dimos.perception.detection2d.type import (
-    Detection2D,
-    Detection3D,
-    ImageDetections2D,
-    ImageDetections3D,
-)
-from dimos.robot.unitree_webrtc.modular import deploy_connection, deploy_navigation
-from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule
-from dimos.robot.unitree_webrtc.type.lidar import LidarMessage
-from dimos.robot.unitree_webrtc.type.map import Map
-
-
-def test_module2d(moment: Moment, publish_lcm):
-    module = Detection2DModule()
-    detections2d = module.process_image_frame(moment["image_frame"])
-
-    print(detections2d)
-
-    # Assertions for test_module2d
-    assert isinstance(detections2d, ImageDetections2D)
-    assert len(detections2d) == 1
-    assert detections2d.image.ts == pytest.approx(1757960670.490248)
-    assert detections2d.image.shape == (720, 1280, 3)
-    assert detections2d.image.frame_id == "camera_optical"
-
-    # Check first detection
-    det = detections2d.detections[0]
-    assert isinstance(det, Detection2D)
-    assert det.name == "suitcase"
-    assert det.class_id == 28
-    assert det.track_id == 1
-    assert det.confidence == pytest.approx(0.8145349025726318)
-
-    # Check bbox values
-    assert det.bbox == pytest.approx(
-        [503.437255859375, 249.89385986328125, 655.950439453125, 469.82879638671875]
-    )
-
-    annotations = detections2d.to_image_annotations()
-    publish_lcm({"annotations": annotations, **moment})
-
-    module._close_module()
-
-
-def test_module3d(moment: Moment, publish_lcm):
-    module2d = Detection2DModule()
-    detections2d = module2d.process_image_frame(moment["image_frame"])
-    pointcloud = moment["lidar_frame"]
-    camera_transform = moment["tf"].get("camera_optical", "world")
-    if camera_transform is None:
-        raise ValueError("No camera_optical transform in tf")
-    annotations = detections2d.to_image_annotations()
-
-    module3d = Detection3DModule(camera_info=moment["camera_info"])
-    detections3d = module3d.process_frame(detections2d, pointcloud, camera_transform)
-
-    publish_lcm(
-        {
-            **moment,
-            "annotations": annotations,
-            "detections": detections3d,
-        }
-    )
-
-    print(detections3d)
-
-    # Assertions for test_module3d
-    assert isinstance(detections3d, ImageDetections3D)
-    assert len(detections3d) == 1
-    assert detections3d.image.ts == pytest.approx(1757960670.490248)
-    assert detections3d.image.shape == (720, 1280, 3)
-    assert detections3d.image.frame_id == "camera_optical"
-
-    # Check first 3D detection
-    det = detections3d.detections[0]
-    assert isinstance(det, Detection3D)
-    assert det.name == "suitcase"
-    assert det.class_id == 28
-    assert det.track_id == 1
-
-    assert det.confidence == pytest.approx(0.8145349025726318)
-
-    # Check bbox values (should match 2D)
-    assert det.bbox == pytest.approx(
-        [503.437255859375, 249.89385986328125, 655.950439453125, 469.82879638671875]
-    )
-
-    # 3D-specific assertions
-    assert isinstance(det.pointcloud, PointCloud2Msg)
-    assert len(det.pointcloud) == 81
-    assert det.pointcloud.frame_id == "world"
-    assert isinstance(det.transform, Transform)
-
-    # Check center
-    center = det.center
-    assert isinstance(center, Vector3)
-    # Values from output: Vector([    -3.3565    -0.26265     0.18549])
-    assert center.x == pytest.approx(-3.3565, abs=1e-4)
-    assert center.y == pytest.approx(-0.26265, abs=1e-4)
-    assert center.z == pytest.approx(0.18549, abs=1e-4)
-
-    # Check pose
-    pose = det.pose
-    assert isinstance(pose, PoseStamped)
-    assert pose.frame_id == "world"
-    assert pose.ts == det.ts
-
-    # Check repr dict values
-    repr_dict = det.to_repr_dict()
-    assert repr_dict["dist"] == "0.88m"
-    assert repr_dict["points"] == "81"
-
-    module2d._close_module()
-    module3d._close_module()
-
-
-@pytest.mark.tool
-def test_module3d_replay(dimos_cluster):
-    connection = deploy_connection(dimos_cluster, loop=False, speed=1.0)
-    # mapper = deploy_navigation(dimos_cluster, connection)
-    mapper = dimos_cluster.deploy(
-        Map, voxel_size=0.5, cost_resolution=0.05, global_publish_interval=1.0
-    )
-    mapper.lidar.connect(connection.lidar)
-    mapper.global_map.transport = LCMTransport("/global_map", LidarMessage)
-    mapper.global_costmap.transport = LCMTransport("/global_costmap", OccupancyGrid)
-    mapper.local_costmap.transport = LCMTransport("/local_costmap", OccupancyGrid)
-
-    mapper.start()
-
-    module3D = dimos_cluster.deploy(Detection3DModule, camera_info=ConnectionModule._camera_info())
-
-    module3D.image.connect(connection.video)
-    module3D.pointcloud.connect(connection.lidar)
-
-    module3D.annotations.transport = LCMTransport("/annotations", ImageAnnotations)
-    module3D.detections.transport = LCMTransport("/detections", Detection2DArray)
-
-    module3D.detected_pointcloud_0.transport = LCMTransport("/detected/pointcloud/0", PointCloud2)
-    module3D.detected_pointcloud_1.transport = LCMTransport("/detected/pointcloud/1", PointCloud2)
-    module3D.detected_pointcloud_2.transport = LCMTransport("/detected/pointcloud/2", PointCloud2)
-
-    module3D.detected_image_0.transport = LCMTransport("/detected/image/0", Image)
-    module3D.detected_image_1.transport = LCMTransport("/detected/image/1", Image)
-    module3D.detected_image_2.transport = LCMTransport("/detected/image/2", Image)
-
-    module3D.start()
-    connection.start()
-    import time
-
-    while True:
-        time.sleep(1)
diff --git a/dimos/perception/detection2d/test_moduleDB.py b/dimos/perception/detection2d/test_moduleDB.py
new file mode 100644
index 0000000000..a3a1b003fd
--- /dev/null
+++ b/dimos/perception/detection2d/test_moduleDB.py
@@ -0,0 +1,62 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+
+import pytest
+from lcm_msgs.foxglove_msgs import SceneUpdate
+
+from dimos.core import LCMTransport
+from dimos.msgs.foxglove_msgs import ImageAnnotations
+from dimos.msgs.geometry_msgs import PoseStamped
+from dimos.msgs.sensor_msgs import Image, PointCloud2
+from dimos.msgs.vision_msgs import Detection2DArray
+from dimos.perception.detection2d.moduleDB import ObjectDBModule
+from dimos.protocol.service import lcmservice as lcm
+from dimos.robot.unitree_webrtc.modular import deploy_connection, deploy_navigation
+from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule
+
+
+@pytest.mark.module
+def test_moduleDB(dimos_cluster):
+    connection = deploy_connection(dimos_cluster)
+
+    moduleDB = dimos_cluster.deploy(
+        ObjectDBModule,
+        camera_info=ConnectionModule._camera_info(),
+        goto=lambda obj_id: print(f"Going to {obj_id}"),
+    )
+    moduleDB.image.connect(connection.video)
+    moduleDB.pointcloud.connect(connection.lidar)
+
+    moduleDB.annotations.transport = LCMTransport("/annotations", ImageAnnotations)
+    moduleDB.detections.transport = LCMTransport("/detections", Detection2DArray)
+
+    moduleDB.detected_pointcloud_0.transport = LCMTransport("/detected/pointcloud/0", PointCloud2)
+    moduleDB.detected_pointcloud_1.transport = LCMTransport("/detected/pointcloud/1", PointCloud2)
+    moduleDB.detected_pointcloud_2.transport = LCMTransport("/detected/pointcloud/2", PointCloud2)
+
+    moduleDB.detected_image_0.transport = LCMTransport("/detected/image/0", Image)
+    moduleDB.detected_image_1.transport = LCMTransport("/detected/image/1", Image)
+    moduleDB.detected_image_2.transport = LCMTransport("/detected/image/2", Image)
+
+    moduleDB.scene_update.transport = LCMTransport("/scene_update", SceneUpdate)
+    moduleDB.target.transport = LCMTransport("/target", PoseStamped)
+
+    connection.start()
+    moduleDB.start()
+
+    time.sleep(4)
+    print("STARTING QUERY!!")
+    print("VLM RES", moduleDB.navigate_to_object_in_view("white floor"))
+    time.sleep(30)
diff --git a/dimos/perception/detection2d/test_type.py b/dimos/perception/detection2d/test_type.py
deleted file mode 100644
index 25264ef727..0000000000
--- a/dimos/perception/detection2d/test_type.py
+++ /dev/null
@@ -1,239 +0,0 @@
-# Copyright 2025 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dimos.perception.detection2d.conftest import detections2d, detections3d
-from dimos.perception.detection2d.type import (
-    Detection2D,
-    Detection3D,
-    ImageDetections2D,
-    ImageDetections3D,
-)
-from dimos.msgs.sensor_msgs import PointCloud2
-from dimos.msgs.geometry_msgs import Transform, Vector3, PoseStamped
-
-
-def test_detections2d(detections2d):
-    print(f"\n=== ImageDetections2D Test ===")
-    print(f"Type: {type(detections2d)}")
-    print(f"Number of detections: {len(detections2d)}")
-    print(f"Image timestamp: {detections2d.image.ts}")
-    print(f"Image shape: {detections2d.image.shape}")
-    print(f"Image frame_id: {detections2d.image.frame_id}")
-
-    print(f"\nFull detections object:")
-    print(detections2d)
-
-    # Basic type assertions
-    assert isinstance(detections2d, ImageDetections2D)
-    assert hasattr(detections2d, "image")
-    assert hasattr(detections2d, "detections")
-
-    # Image assertions
-    assert detections2d.image is not None
-    assert detections2d.image.ts == 1757960670.490248
-    assert detections2d.image.shape == (720, 1280, 3)
-    assert detections2d.image.frame_id == "camera_optical"
-
-    # Detection count assertions
-    assert len(detections2d) == 1
-    assert isinstance(detections2d.detections, list)
-    assert len(detections2d.detections) == 1
-
-    # Test first detection with literal checks
-    det = detections2d.detections[0]
-    print(f"\n--- Detection 0 (literal checks) ---")
-    print(f"Type: {type(det)}")
-    print(f"Name: {det.name}")
-    print(f"Class ID: {det.class_id}")
-    print(f"Track ID: {det.track_id}")
-    print(f"Confidence: {det.confidence}")
-    print(f"Bbox: {det.bbox}")
-    print(f"Timestamp: {det.ts}")
-
-    # Detection type assertions
-    assert isinstance(det, Detection2D)
-
-    # Literal value assertions
-    assert det.name == "suitcase"
-    assert det.class_id == 28  # COCO class 28 is suitcase
-    assert det.track_id == 1
-    assert 0.814 < det.confidence < 0.815  # Allow small floating point variance
-
-    # Data type assertions
-    assert isinstance(det.name, str)
-    assert isinstance(det.class_id, int)
-    assert isinstance(det.track_id, int)
-    assert isinstance(det.confidence, float)
-    assert isinstance(det.bbox, (tuple, list)) and len(det.bbox) == 4
-    assert isinstance(det.ts, float)
-
-    # Bbox literal checks (with tolerance for float precision)
-    x1, y1, x2, y2 = det.bbox
-    assert 503.4 < x1 < 503.5
-    assert 249.8 < y1 < 250.0
-    assert 655.9 < x2 < 656.0
-    assert 469.8 < y2 < 470.0
-
-    # Bbox format assertions
-    assert all(isinstance(coord, (int, float)) for coord in det.bbox)
-    assert x2 > x1, f"x2 ({x2}) should be greater than x1 ({x1})"
-    assert y2 > y1, f"y2 ({y2}) should be greater than y1 ({y1})"
-    assert x1 >= 0 and y1 >= 0, "Bbox coordinates should be non-negative"
-
-    # Bbox width/height checks
-    width = x2 - x1
-    height = y2 - y1
-    assert 152.0 < width < 153.0  # Expected width ~152.5
-    assert 219.0 < height < 221.0  # Expected height ~219.9
-
-    # Confidence assertions
-    assert 0.0 <= det.confidence <= 1.0, (
-        f"Confidence should be between 0 and 1, got {det.confidence}"
-    )
-
-    # Image reference assertion
-    assert det.image is detections2d.image, "Detection should reference the same image"
-
-    # Timestamp consistency
-    assert det.ts == detections2d.image.ts
-    assert det.ts == 1757960670.490248
-
-
-def test_detections3d(detections3d):
-    print(f"\n=== ImageDetections3D Test ===")
-    print(f"Type: {type(detections3d)}")
-    print(f"Number of detections: {len(detections3d)}")
-    print(f"Image timestamp: {detections3d.image.ts}")
-    print(f"Image shape: {detections3d.image.shape}")
-    print(f"Image frame_id: {detections3d.image.frame_id}")
-
-    print(f"\nFull detections object:")
-    print(detections3d)
-
-    # Basic type assertions
-    assert isinstance(detections3d, ImageDetections3D)
-    assert hasattr(detections3d, "image")
-    assert hasattr(detections3d, "detections")
-
-    # Image assertions
-    assert detections3d.image is not None
-    assert detections3d.image.ts == 1757960670.490248
-    assert detections3d.image.shape == (720, 1280, 3)
-    assert detections3d.image.frame_id == "camera_optical"
-
-    # Detection count assertions
-    assert len(detections3d) == 1
-    assert isinstance(detections3d.detections, list)
-    assert len(detections3d.detections) == 1
-
-    # Test first 3D detection with literal checks
-    det = detections3d.detections[0]
-    print(f"\n--- Detection3D 0 (literal checks) ---")
-    print(f"Type: {type(det)}")
-    print(f"Name: {det.name}")
-    print(f"Class ID: {det.class_id}")
-    print(f"Track ID: {det.track_id}")
-    print(f"Confidence: {det.confidence}")
-    print(f"Bbox: {det.bbox}")
-    print(f"Timestamp: {det.ts}")
-    print(f"Has pointcloud: {hasattr(det, 'pointcloud')}")
-    print(f"Has transform: {hasattr(det, 'transform')}")
-    if hasattr(det, "pointcloud"):
-        print(f"Pointcloud points: {len(det.pointcloud)}")
-        print(f"Pointcloud frame_id: {det.pointcloud.frame_id}")
-
-    # Detection type assertions
-    assert isinstance(det, Detection3D)
-
-    # Detection3D should have all Detection2D fields plus pointcloud and transform
-    assert hasattr(det, "bbox")
-    assert hasattr(det, "track_id")
-    assert hasattr(det, "class_id")
-    assert hasattr(det, "confidence")
-    assert hasattr(det, "name")
-    assert hasattr(det, "ts")
-    assert hasattr(det, "image")
-    assert hasattr(det, "pointcloud")
-    assert hasattr(det, "transform")
-
-    # Literal value assertions (should match Detection2D)
-    assert det.name == "suitcase"
-    assert det.class_id == 28  # COCO class 28 is suitcase
-    assert det.track_id == 1
-    assert 0.814 < det.confidence < 0.815  # Allow small floating point variance
-
-    # Data type assertions
-    assert isinstance(det.name, str)
-    assert isinstance(det.class_id, int)
-    assert isinstance(det.track_id, int)
-    assert isinstance(det.confidence, float)
-    assert isinstance(det.bbox, (tuple, list)) and len(det.bbox) == 4
-    assert isinstance(det.ts, float)
-
-    # Bbox literal checks (should match Detection2D)
-    x1, y1, x2, y2 = det.bbox
-    assert 503.4 < x1 < 503.5
-    assert 249.8 < y1 < 250.0
-    assert 655.9 < x2 < 656.0
-    assert 469.8 < y2 < 470.0
-
-    # 3D-specific assertions
-    assert isinstance(det.pointcloud, PointCloud2)
-    assert isinstance(det.transform, Transform)
-
-    # Pointcloud assertions
-    assert len(det.pointcloud) == 81  # Based on the output we saw
-    assert det.pointcloud.frame_id == "world"  # Pointcloud should be in world frame
-
-    # Test center calculation
-    center = det.center
-    print(f"\nDetection center: {center}")
-    assert isinstance(center, Vector3)
-    assert hasattr(center, "x")
-    assert hasattr(center, "y")
-    assert hasattr(center, "z")
-
-    # Test pose property
-    pose = det.pose
-    print(f"Detection pose: {pose}")
-    assert isinstance(pose, PoseStamped)
-    assert pose.frame_id == "world"
-    assert pose.ts == det.ts
-    assert pose.position == center  # Pose position should match center
-
-    # Check distance calculation (from to_repr_dict)
-    repr_dict = det.to_repr_dict()
-    print(f"\nRepr dict: {repr_dict}")
-    assert "dist" in repr_dict
-    assert repr_dict["dist"] == "0.88m"  # Based on the output
-    assert repr_dict["points"] == "81"
-    assert repr_dict["name"] == "suitcase"
-    assert repr_dict["class"] == "28"
-    assert repr_dict["track"] == "1"
-
-    # Image reference assertion
-    assert det.image is detections3d.image, "Detection should reference the same image"
-
-    # Timestamp consistency
-    assert det.ts == detections3d.image.ts
-    assert det.ts == 1757960670.490248
-
-
-def test_detection3d_to_pose(detections3d):
-    det = detections3d[0]
-    pose = det.pose
-
-    # Check that pose is valid
-    assert pose.frame_id == "world"
-    assert pose.ts == det.ts
diff --git a/dimos/perception/detection2d/type/__init__.py b/dimos/perception/detection2d/type/__init__.py
new file mode 100644
index 0000000000..fb7c435c0c
--- /dev/null
+++ b/dimos/perception/detection2d/type/__init__.py
@@ -0,0 +1,7 @@
+from dimos.perception.detection2d.type.detection2d import (
+    Detection2D,
+    ImageDetections2D,
+    InconvinientDetectionFormat,
+)
+from dimos.perception.detection2d.type.detection3d import Detection3D, ImageDetections3D
+from dimos.perception.detection2d.type.imageDetections import ImageDetections, TableStr
diff --git a/dimos/perception/detection2d/type.py b/dimos/perception/detection2d/type/detection2d.py
similarity index 61%
rename from dimos/perception/detection2d/type.py
rename to dimos/perception/detection2d/type/detection2d.py
index b37828e8e8..5bbedfb6ae 100644
--- a/dimos/perception/detection2d/type.py
+++ b/dimos/perception/detection2d/type/detection2d.py
@@ -14,13 +14,10 @@
 
 from __future__ import annotations
 
-import functools
 import hashlib
 from dataclasses import dataclass
-from typing import Any, Dict, Generic, List, Optional, Tuple, TypeVar
+from typing import Any, Dict, List, Tuple
 
-import numpy as np
-from dimos_lcm.foxglove_msgs.Color import Color
 from dimos_lcm.foxglove_msgs.ImageAnnotations import (
     PointsAnnotation,
     TextAnnotation,
@@ -37,14 +34,13 @@
     Detection2D as ROSDetection2D,
 )
 from rich.console import Console
-from rich.table import Table
 from rich.text import Text
 
 from dimos.msgs.foxglove_msgs import ImageAnnotations
-from dimos.msgs.geometry_msgs import PoseStamped, Transform, Vector3
-from dimos.msgs.sensor_msgs import Image, PointCloud2
+from dimos.msgs.foxglove_msgs.Color import Color
+from dimos.msgs.sensor_msgs import Image
 from dimos.msgs.std_msgs import Header
-from dimos.msgs.vision_msgs import Detection2DArray
+from dimos.perception.detection2d.type.imageDetections import ImageDetections
 from dimos.types.timestamped import Timestamped, to_ros_stamp, to_timestamp
 
 Bbox = Tuple[float, float, float, float]
@@ -56,6 +52,32 @@
 Detections = List[Detection]
 
 
+def _hash_to_color(name: str) -> str:
+    """Generate a consistent color for a given name using hash."""
+    # List of rich colors to choose from
+    colors = [
+        "cyan",
+        "magenta",
+        "yellow",
+        "blue",
+        "green",
+        "red",
+        "bright_cyan",
+        "bright_magenta",
+        "bright_yellow",
+        "bright_blue",
+        "bright_green",
+        "bright_red",
+        "purple",
+        "white",
+        "pink",
+    ]
+
+    # Hash the name and pick a color
+    hash_value = hashlib.md5(name.encode()).digest()[0]
+    return colors[hash_value % len(colors)]
+
+
 # yolo and detic have bad formats this translates into list of detections
 def better_detection_format(inconvinient_detections: InconvinientDetectionFormat) -> Detections:
     bboxes, track_ids, class_ids, confidences, names = inconvinient_detections
@@ -84,13 +106,10 @@ def to_repr_dict(self) -> Dict[str, Any]:
             "name": self.name,
             "class": str(self.class_id),
             "track": str(self.track_id),
-            "conf": self.confidence,
+            "conf": f"{self.confidence:.2f}",
             "bbox": f"[{x1:.0f},{y1:.0f},{x2:.0f},{y2:.0f}]",
         }
 
-    def to_image(self) -> Image:
-        return self.image
-
     # return focused image, only on the bbox
     def cropped_image(self, padding: int = 20) -> Image:
         """Return a cropped version of the image focused on the bounding box.
@@ -110,26 +129,18 @@ def __str__(self):
         console = Console(force_terminal=True, legacy_windows=False)
         d = self.to_repr_dict()
 
-        # Create confidence text with color based on value
-        conf_color = "green" if d["conf"] > 0.8 else "yellow" if d["conf"] > 0.5 else "red"
-        conf_text = Text(f"{d['conf']:.1%}", style=conf_color)
-
         # Build the string representation
         parts = [
             Text(f"{self.__class__.__name__}("),
-            Text(d["name"], style="bold cyan"),
-            Text(f" cls={d['class']} trk={d['track']} "),
-            conf_text,
-            Text(f" {d['bbox']}"),
         ]
 
         # Add any extra fields (e.g., points for Detection3D)
-        extra_keys = [k for k in d.keys() if k not in ["name", "class", "track", "conf", "bbox"]]
+        extra_keys = [k for k in d.keys() if k not in ["class"]]
         for key in extra_keys:
             if d[key] == "None":
-                parts.append(Text(f" {key}={d[key]}", style="dim"))
+                parts.append(Text(f"{key}={d[key]}", style="dim"))
             else:
-                parts.append(Text(f" {key}={d[key]}", style="blue"))
+                parts.append(Text(f"{key}={d[key]}", style=_hash_to_color(key)))
 
         parts.append(Text(")"))
 
@@ -138,6 +149,12 @@ def __str__(self):
             console.print(*parts, end="")
         return capture.get().strip()
 
+    def bbox_2d_volume(self) -> float:
+        x1, y1, x2, y2 = self.bbox
+        width = max(0.0, x2 - x1)
+        height = max(0.0, y2 - y1)
+        return width * height
+
     @classmethod
     def from_detector(
         cls, raw_detections: InconvinientDetectionFormat, **kwargs
@@ -179,7 +196,7 @@ def to_ros_bbox(self) -> BoundingBox2D:
         )
 
     def lcm_encode(self):
-        return self.to_imageannotations().lcm_encode()
+        return self.to_image_annotations().lcm_encode()
 
     def to_text_annotation(self) -> List[TextAnnotation]:
         x1, y1, x2, y2 = self.bbox
@@ -198,7 +215,7 @@ def to_text_annotation(self) -> List[TextAnnotation]:
             TextAnnotation(
                 timestamp=to_ros_stamp(self.ts),
                 position=Point2(x=x1, y=y1),
-                text=f"{self.name}_{self.class_id}_{self.track_id}",
+                text=f"{self.name}_{self.track_id}",
                 font_size=font_size,
                 text_color=Color(r=1.0, g=1.0, b=1.0, a=1),
                 background_color=Color(r=0, g=0, b=0, a=1),
@@ -214,7 +231,7 @@ def to_points_annotation(self) -> List[PointsAnnotation]:
             PointsAnnotation(
                 timestamp=to_ros_stamp(self.ts),
                 outline_color=Color(r=0.0, g=0.0, b=0.0, a=1.0),
-                fill_color=Color(r=1.0, g=0.0, b=0.0, a=0.15),
+                fill_color=Color.from_string(self.name, alpha=0.15),
                 thickness=thickness,
                 points_length=4,
                 points=[
@@ -230,7 +247,7 @@ def to_points_annotation(self) -> List[PointsAnnotation]:
     # this is almost never called directly since this is a single detection
     # and ImageAnnotations message normally contains multiple detections annotations
     # so ImageDetections2D and ImageDetections3D normally implements this for whole image
-    def to_annotations(self) -> ImageAnnotations:
+    def to_image_annotations(self) -> ImageAnnotations:
         points = self.to_points_annotation()
         texts = self.to_text_annotation()
 
@@ -300,179 +317,6 @@ def to_ros_detection2d(self) -> ROSDetection2D:
             id=str(self.track_id),
         )
 
-    def to_3d(self, **kwargs) -> "Detection3D":
-        return Detection3D(
-            image=self.image,
-            bbox=self.bbox,
-            track_id=self.track_id,
-            class_id=self.class_id,
-            confidence=self.confidence,
-            name=self.name,
-            ts=self.ts,
-            **kwargs,
-        )
-
-
-@dataclass
-class Detection3D(Detection2D):
-    pointcloud: PointCloud2
-    transform: Transform
-
-    def localize(self, pointcloud: PointCloud2) -> Detection3D:
-        self.pointcloud = pointcloud
-        return self
-
-    @functools.cached_property
-    def center(self) -> Vector3:
-        """Calculate the center of the pointcloud in world frame."""
-        points = np.asarray(self.pointcloud.pointcloud.points)
-        center = points.mean(axis=0)
-        return Vector3(*center)
-
-    @functools.cached_property
-    def pose(self) -> PoseStamped:
-        """Convert detection to a PoseStamped using pointcloud center.
-
-        Returns pose in world frame with identity rotation.
-        The pointcloud is already in world frame.
-        """
-        return PoseStamped(
-            ts=self.ts,
-            frame_id="world",
-            position=self.center,
-            orientation=(0.0, 0.0, 0.0, 1.0),  # Identity quaternion
-        )
-
-    def to_repr_dict(self) -> Dict[str, Any]:
-        d = super().to_repr_dict()
-
-        # Add pointcloud info
-        d["points"] = str(len(self.pointcloud))
-
-        # Calculate distance from camera
-        # The pointcloud is in world frame, and transform gives camera position in world
-        center_world = self.center
-        # Camera position in world frame is the translation part of the transform
-        camera_pos = self.transform.translation
-        # Use Vector3 subtraction and magnitude
-        distance = (center_world - camera_pos).magnitude()
-        d["dist"] = f"{distance:.2f}m"
-
-        return d
-
-
-T = TypeVar("T", bound="Detection2D")
-
-
-def _hash_to_color(name: str) -> str:
-    """Generate a consistent color for a given name using hash."""
-    # List of rich colors to choose from
-    colors = [
-        "cyan",
-        "magenta",
-        "yellow",
-        "blue",
-        "green",
-        "red",
-        "bright_cyan",
-        "bright_magenta",
-        "bright_yellow",
-        "bright_blue",
-        "bright_green",
-        "bright_red",
-        "purple",
-        "white",
-        "pink",
-    ]
-
-    # Hash the name and pick a color
-    hash_value = hashlib.md5(name.encode()).digest()[0]
-    return colors[hash_value % len(colors)]
-
-
-class ImageDetections(Generic[T]):
-    image: Image
-    detections: List[T]
-
-    def __init__(self, image: Image, detections: List[T]):
-        self.image = image
-        self.detections = detections
-        for det in self.detections:
-            if not det.ts:
-                det.ts = image.ts
-
-    def __str__(self):
-        console = Console(force_terminal=True, legacy_windows=False)
-
-        # Dynamically build columns based on the first detection's dict keys
-        if not self.detections:
-            return "Empty ImageDetections"
-
-        # Create a table for detections
-        table = Table(
-            title=f"{self.__class__.__name__} [{len(self.detections)} detections @ {to_timestamp(self.image.ts):.3f}]",
-            show_header=True,
-            show_edge=True,
-        )
-
-        # Cache all repr_dicts to avoid double computation
-        detection_dicts = [det.to_repr_dict() for det in self.detections]
-
-        first_dict = detection_dicts[0]
-        table.add_column("#", style="dim")
-        for col in first_dict.keys():
-            color = _hash_to_color(col)
-            table.add_column(col.title(), style=color)
-
-        # Add each detection to the table
-        for i, d in enumerate(detection_dicts):
-            row = [str(i)]
-
-            for key in first_dict.keys():
-                if key == "conf":
-                    # Color-code confidence
-                    conf_color = "green" if d[key] > 0.8 else "yellow" if d[key] > 0.5 else "red"
-                    row.append(Text(f"{d[key]:.1%}", style=conf_color))
-                elif key == "points" and d.get(key) == "None":
-                    row.append(Text(d.get(key, ""), style="dim"))
-                else:
-                    row.append(str(d.get(key, "")))
-            table.add_row(*row)
-
-        with console.capture() as capture:
-            console.print(table)
-        return capture.get().strip()
-
-    def __len__(self):
-        return len(self.detections)
-
-    def __iter__(self):
-        return iter(self.detections)
-
-    def __getitem__(self, index):
-        return self.detections[index]
-
-    def to_ros_detection2d_array(self) -> Detection2DArray:
-        return Detection2DArray(
-            detections_length=len(self.detections),
-            header=Header(self.image.ts, "camera_optical"),
-            detections=[det.to_ros_detection2d() for det in self.detections],
-        )
-
-    def to_image_annotations(self) -> ImageAnnotations:
-        def flatten(xss):
-            return [x for xs in xss for x in xs]
-
-        texts = flatten(det.to_text_annotation() for det in self.detections)
-        points = flatten(det.to_points_annotation() for det in self.detections)
-
-        return ImageAnnotations(
-            texts=texts,
-            texts_length=len(texts),
-            points=points,
-            points_length=len(points),
-        )
-
 
 class ImageDetections2D(ImageDetections[Detection2D]):
     @classmethod
@@ -483,9 +327,3 @@ def from_detector(
             image=image,
             detections=Detection2D.from_detector(raw_detections, image=image, ts=image.ts),
         )
-
-
-class ImageDetections3D(ImageDetections[Detection3D]):
-    """Specialized class for 3D detections in an image."""
-
-    ...
diff --git a/dimos/perception/detection2d/type/detection3d.py b/dimos/perception/detection2d/type/detection3d.py
new file mode 100644
index 0000000000..8c0919b700
--- /dev/null
+++ b/dimos/perception/detection2d/type/detection3d.py
@@ -0,0 +1,403 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import functools
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, Optional, TypeVar
+
+import numpy as np
+from dimos_lcm.sensor_msgs import CameraInfo
+from lcm_msgs.builtin_interfaces import Duration
+from lcm_msgs.foxglove_msgs import CubePrimitive, SceneEntity, SceneUpdate, TextPrimitive
+from lcm_msgs.geometry_msgs import Point, Pose, Quaternion
+from lcm_msgs.geometry_msgs import Vector3 as LCMVector3
+
+from dimos.msgs.foxglove_msgs.Color import Color
+from dimos.msgs.geometry_msgs import PoseStamped, Transform, Vector3
+from dimos.msgs.sensor_msgs import PointCloud2
+from dimos.perception.detection2d.type.detection2d import Detection2D
+from dimos.perception.detection2d.type.imageDetections import ImageDetections
+from dimos.types.timestamped import to_ros_stamp
+
+Detection3DFilter = Callable[
+    [Detection2D, PointCloud2, CameraInfo, Transform], Optional["Detection3D"]
+]
+
+
+def height_filter(height=0.1) -> Detection3DFilter:
+    return lambda det, pc, ci, tf: pc.filter_by_height(height)
+
+
+def statistical(nb_neighbors=40, std_ratio=0.5) -> Detection3DFilter:
+    def filter_func(
+        det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform
+    ) -> Optional[PointCloud2]:
+        try:
+            statistical, removed = pc.pointcloud.remove_statistical_outlier(
+                nb_neighbors=nb_neighbors, std_ratio=std_ratio
+            )
+            return PointCloud2(statistical, pc.frame_id, pc.ts)
+        except Exception as e:
+            # print("statistical filter failed:", e)
+            return None
+
+    return filter_func
+
+
+def raycast() -> Detection3DFilter:
+    def filter_func(
+        det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform
+    ) -> Optional[PointCloud2]:
+        try:
+            camera_pos = tf.inverse().translation
+            camera_pos_np = camera_pos.to_numpy()
+            _, visible_indices = pc.pointcloud.hidden_point_removal(camera_pos_np, radius=100.0)
+            visible_pcd = pc.pointcloud.select_by_index(visible_indices)
+            return PointCloud2(visible_pcd, pc.frame_id, pc.ts)
+        except Exception as e:
+            # print("raycast filter failed:", e)
+            return None
+
+    return filter_func
+
+
+def radius_outlier(min_neighbors: int = 20, radius: float = 0.3) -> Detection3DFilter:
+    """
+    Remove isolated points: keep only points that have at least `min_neighbors`
+    neighbors within `radius` meters (same units as your point cloud).
+    """
+
+    def filter_func(
+        det: Detection2D, pc: PointCloud2, ci: CameraInfo, tf: Transform
+    ) -> Optional[PointCloud2]:
+        filtered_pcd, removed = pc.pointcloud.remove_radius_outlier(
+            nb_points=min_neighbors, radius=radius
+        )
+        return PointCloud2(filtered_pcd, pc.frame_id, pc.ts)
+
+    return filter_func
+
+
+@dataclass
+class Detection3D(Detection2D):
+    pointcloud: PointCloud2
+    transform: Transform
+    frame_id: str = "unknown"
+
+    @classmethod
+    def from_2d(
+        cls,
+        det: Detection2D,
+        world_pointcloud: PointCloud2,
+        camera_info: CameraInfo,
+        world_to_optical_transform: Transform,
+        # filters are to be adjusted based on the sensor noise characteristics if feeding
+        # sensor data directly
+        filters: list[Callable[[PointCloud2], PointCloud2]] = [
+            # height_filter(0.1),
+            raycast(),
+            radius_outlier(),
+            statistical(),
+        ],
+    ) -> Optional["Detection3D"]:
+        """Create a Detection3D from a 2D detection by projecting world pointcloud.
+
+        This method handles:
+        1. Projecting world pointcloud to camera frame
+        2. Filtering points within the 2D detection bounding box
+        3. Cleaning up the pointcloud (height filter, outlier removal)
+        4. Hidden point removal from camera perspective
+
+        Args:
+            det: The 2D detection
+            world_pointcloud: Full pointcloud in world frame
+            camera_info: Camera calibration info
+            world_to_camerlka_transform: Transform from world to camera frame
+            filters: List of functions to apply to the pointcloud for filtering
+        Returns:
+            Detection3D with filtered pointcloud, or None if no valid points
+        """
+        # Extract camera parameters
+        fx, fy = camera_info.K[0], camera_info.K[4]
+        cx, cy = camera_info.K[2], camera_info.K[5]
+        image_width = camera_info.width
+        image_height = camera_info.height
+
+        camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+
+        # Convert pointcloud to numpy array
+        world_points = world_pointcloud.as_numpy()
+
+        # Project points to camera frame
+        points_homogeneous = np.hstack([world_points, np.ones((world_points.shape[0], 1))])
+        extrinsics_matrix = world_to_optical_transform.to_matrix()
+        points_camera = (extrinsics_matrix @ points_homogeneous.T).T
+
+        # Filter out points behind the camera
+        valid_mask = points_camera[:, 2] > 0
+        points_camera = points_camera[valid_mask]
+        world_points = world_points[valid_mask]
+
+        if len(world_points) == 0:
+            return None
+
+        # Project to 2D
+        points_2d_homogeneous = (camera_matrix @ points_camera[:, :3].T).T
+        points_2d = points_2d_homogeneous[:, :2] / points_2d_homogeneous[:, 2:3]
+
+        # Filter points within image bounds
+        in_image_mask = (
+            (points_2d[:, 0] >= 0)
+            & (points_2d[:, 0] < image_width)
+            & (points_2d[:, 1] >= 0)
+            & (points_2d[:, 1] < image_height)
+        )
+        points_2d = points_2d[in_image_mask]
+        world_points = world_points[in_image_mask]
+
+        if len(world_points) == 0:
+            return None
+
+        # Extract bbox from Detection2D
+        x_min, y_min, x_max, y_max = det.bbox
+
+        # Find points within this detection box (with small margin)
+        margin = 5  # pixels
+        in_box_mask = (
+            (points_2d[:, 0] >= x_min - margin)
+            & (points_2d[:, 0] <= x_max + margin)
+            & (points_2d[:, 1] >= y_min - margin)
+            & (points_2d[:, 1] <= y_max + margin)
+        )
+
+        detection_points = world_points[in_box_mask]
+
+        if detection_points.shape[0] == 0:
+            # print(f"No points found in detection bbox after projection. {det.name}")
+            return None
+
+        # Create initial pointcloud for this detection
+        initial_pc = PointCloud2.from_numpy(
+            detection_points,
+            frame_id=world_pointcloud.frame_id,
+            timestamp=world_pointcloud.ts,
+        )
+
+        # Apply filters - each filter needs all 4 arguments
+        detection_pc = initial_pc
+        for filter_func in filters:
+            result = filter_func(det, detection_pc, camera_info, world_to_optical_transform)
+            if result is None:
+                return None
+            detection_pc = result
+
+        # Final check for empty pointcloud
+        if len(detection_pc.pointcloud.points) == 0:
+            return None
+
+        # Create Detection3D with filtered pointcloud
+        return Detection3D(
+            image=det.image,
+            bbox=det.bbox,
+            track_id=det.track_id,
+            class_id=det.class_id,
+            confidence=det.confidence,
+            name=det.name,
+            ts=det.ts,
+            pointcloud=detection_pc,
+            transform=world_to_optical_transform,
+            frame_id=world_pointcloud.frame_id,
+        )
+
+    @functools.cached_property
+    def center(self) -> Vector3:
+        return Vector3(*self.pointcloud.center)
+
+    @functools.cached_property
+    def pose(self) -> PoseStamped:
+        """Convert detection to a PoseStamped using pointcloud center.
+
+        Returns pose in world frame with identity rotation.
+        The pointcloud is already in world frame.
+        """
+        return PoseStamped(
+            ts=self.ts,
+            frame_id=self.frame_id,
+            position=self.center,
+            orientation=(0.0, 0.0, 0.0, 1.0),  # Identity quaternion
+        )
+
+    def get_bounding_box(self):
+        """Get axis-aligned bounding box of the detection's pointcloud."""
+        return self.pointcloud.get_axis_aligned_bounding_box()
+
+    def get_oriented_bounding_box(self):
+        """Get oriented bounding box of the detection's pointcloud."""
+        return self.pointcloud.get_oriented_bounding_box()
+
+    def get_bounding_box_dimensions(self) -> tuple[float, float, float]:
+        """Get dimensions (width, height, depth) of the detection's bounding box."""
+        return self.pointcloud.get_bounding_box_dimensions()
+
+    def bounding_box_intersects(self, other: "Detection3D") -> bool:
+        """Check if this detection's bounding box intersects with another's."""
+        return self.pointcloud.bounding_box_intersects(other.pointcloud)
+
+    def to_repr_dict(self) -> Dict[str, Any]:
+        # Calculate distance from camera
+        # The pointcloud is in world frame, and transform gives camera position in world
+        center_world = self.center
+        # Camera position in world frame is the translation part of the transform
+        camera_pos = self.transform.translation
+        # Use Vector3 subtraction and magnitude
+        distance = (center_world - camera_pos).magnitude()
+
+        parent_dict = super().to_repr_dict()
+        # Remove bbox key if present
+        parent_dict.pop("bbox", None)
+
+        return {
+            **parent_dict,
+            "dist": f"{distance:.2f}m",
+            "points": str(len(self.pointcloud)),
+        }
+
+    def to_foxglove_scene_entity(self, entity_id: str = None) -> "SceneEntity":
+        """Convert detection to a Foxglove SceneEntity with cube primitive and text label.
+
+        Args:
+            entity_id: Optional custom entity ID. If None, generates one from name and hash.
+
+        Returns:
+            SceneEntity with cube bounding box and text label
+        """
+
+        # Create a cube primitive for the bounding box
+        cube = CubePrimitive()
+
+        # Get the axis-aligned bounding box
+        aabb = self.get_bounding_box()
+
+        # Set pose from axis-aligned bounding box
+        cube.pose = Pose()
+        cube.pose.position = Point()
+        # Get center of the axis-aligned bounding box
+        aabb_center = aabb.get_center()
+        cube.pose.position.x = aabb_center[0]
+        cube.pose.position.y = aabb_center[1]
+        cube.pose.position.z = aabb_center[2]
+
+        # For axis-aligned box, use identity quaternion (no rotation)
+        cube.pose.orientation = Quaternion()
+        cube.pose.orientation.x = 0
+        cube.pose.orientation.y = 0
+        cube.pose.orientation.z = 0
+        cube.pose.orientation.w = 1
+
+        # Set size from axis-aligned bounding box
+        cube.size = LCMVector3()
+        aabb_extent = aabb.get_extent()
+        cube.size.x = aabb_extent[0]  # width
+        cube.size.y = aabb_extent[1]  # height
+        cube.size.z = aabb_extent[2]  # depth
+
+        # Set color based on name hash
+        cube.color = Color.from_string(self.name, alpha=0.2)
+
+        # Create text label
+        text = TextPrimitive()
+        text.pose = Pose()
+        text.pose.position = Point()
+        text.pose.position.x = aabb_center[0]
+        text.pose.position.y = aabb_center[1]
+        text.pose.position.z = aabb_center[2] + aabb_extent[2] / 2 + 0.1  # Above the box
+        text.pose.orientation = Quaternion()
+        text.pose.orientation.x = 0
+        text.pose.orientation.y = 0
+        text.pose.orientation.z = 0
+        text.pose.orientation.w = 1
+        text.billboard = True
+        text.font_size = 20.0
+        text.scale_invariant = True
+        text.color = Color()
+        text.color.r = 1.0
+        text.color.g = 1.0
+        text.color.b = 1.0
+        text.color.a = 1.0
+        text.text = self.scene_entity_label()
+
+        # Create scene entity
+        entity = SceneEntity()
+        entity.timestamp = to_ros_stamp(self.ts)
+        entity.frame_id = self.frame_id
+        entity.id = str(self.track_id)
+        entity.lifetime = Duration()
+        entity.lifetime.sec = 0  # Persistent
+        entity.lifetime.nanosec = 0
+        entity.frame_locked = False
+
+        # Initialize all primitive arrays
+        entity.metadata_length = 0
+        entity.metadata = []
+        entity.arrows_length = 0
+        entity.arrows = []
+        entity.cubes_length = 1
+        entity.cubes = [cube]
+        entity.spheres_length = 0
+        entity.spheres = []
+        entity.cylinders_length = 0
+        entity.cylinders = []
+        entity.lines_length = 0
+        entity.lines = []
+        entity.triangles_length = 0
+        entity.triangles = []
+        entity.texts_length = 1
+        entity.texts = [text]
+        entity.models_length = 0
+        entity.models = []
+
+        return entity
+
+    def scene_entity_label(self) -> str:
+        return f"{self.track_id}/{self.name} ({self.confidence:.0%})"
+
+
+T = TypeVar("T", bound="Detection2D")
+
+
+class ImageDetections3D(ImageDetections[Detection3D]):
+    """Specialized class for 3D detections in an image."""
+
+    def to_foxglove_scene_update(self) -> "SceneUpdate":
+        """Convert all detections to a Foxglove SceneUpdate message.
+
+        Returns:
+            SceneUpdate containing SceneEntity objects for all detections
+        """
+
+        # Create SceneUpdate message with all detections
+        scene_update = SceneUpdate()
+        scene_update.deletions_length = 0
+        scene_update.deletions = []
+        scene_update.entities = []
+
+        # Process each detection
+        for i, detection in enumerate(self.detections):
+            entity = detection.to_foxglove_scene_entity(entity_id=f"detection_{detection.name}_{i}")
+            scene_update.entities.append(entity)
+
+        scene_update.entities_length = len(scene_update.entities)
+        return scene_update
diff --git a/dimos/perception/detection2d/type/imageDetections.py b/dimos/perception/detection2d/type/imageDetections.py
new file mode 100644
index 0000000000..edd8449f06
--- /dev/null
+++ b/dimos/perception/detection2d/type/imageDetections.py
@@ -0,0 +1,157 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import hashlib
+from typing import TYPE_CHECKING, Any, Dict, Generic, List, Optional, Tuple, TypeVar
+
+from rich.console import Console
+from rich.table import Table
+from rich.text import Text
+
+from dimos.msgs.foxglove_msgs import ImageAnnotations
+from dimos.msgs.sensor_msgs import Image
+from dimos.msgs.std_msgs import Header
+from dimos.msgs.vision_msgs import Detection2DArray
+from dimos.types.timestamped import to_timestamp
+
+if TYPE_CHECKING:
+    from dimos.perception.detection2d.type.detection2d import Detection2D
+
+T = TypeVar("T", bound="Detection2D")
+
+
+def _hash_to_color(name: str) -> str:
+    """Generate a consistent color for a given name using hash."""
+    # List of rich colors to choose from
+    colors = [
+        "cyan",
+        "magenta",
+        "yellow",
+        "blue",
+        "green",
+        "red",
+        "bright_cyan",
+        "bright_magenta",
+        "bright_yellow",
+        "bright_blue",
+        "bright_green",
+        "bright_red",
+        "purple",
+        "white",
+        "pink",
+    ]
+
+    # Hash the name and pick a color
+    hash_value = hashlib.md5(name.encode()).digest()[0]
+    return colors[hash_value % len(colors)]
+
+
+class TableStr:
+    def __str__(self):
+        console = Console(force_terminal=True, legacy_windows=False)
+
+        # Create a table for detections
+        table = Table(
+            title=f"{self.__class__.__name__} [{len(self.detections)} detections @ {to_timestamp(self.image.ts):.3f}]",
+            show_header=True,
+            show_edge=True,
+        )
+
+        # Dynamically build columns based on the first detection's dict keys
+        if not self.detections:
+            return (
+                f"   {self.__class__.__name__} [0 detections @ {to_timestamp(self.image.ts):.3f}]"
+            )
+
+        # Cache all repr_dicts to avoid double computation
+        detection_dicts = [det.to_repr_dict() for det in self]
+
+        first_dict = detection_dicts[0]
+        table.add_column("#", style="dim")
+        for col in first_dict.keys():
+            color = _hash_to_color(col)
+            table.add_column(col.title(), style=color)
+
+        # Add each detection to the table
+        for i, d in enumerate(detection_dicts):
+            row = [str(i)]
+
+            for key in first_dict.keys():
+                if key == "conf":
+                    # Color-code confidence
+                    conf_color = (
+                        "green"
+                        if float(d[key]) > 0.8
+                        else "yellow"
+                        if float(d[key]) > 0.5
+                        else "red"
+                    )
+                    row.append(Text(f"{d[key]}", style=conf_color))
+                elif key == "points" and d.get(key) == "None":
+                    row.append(Text(d.get(key, ""), style="dim"))
+                else:
+                    row.append(str(d.get(key, "")))
+            table.add_row(*row)
+
+        with console.capture() as capture:
+            console.print(table)
+        return capture.get().strip()
+
+
+class ImageDetections(Generic[T], TableStr):
+    image: Image
+    detections: List[T]
+
+    @property
+    def ts(self) -> float:
+        return self.image.ts
+
+    def __init__(self, image: Image, detections: Optional[List[T]] = None):
+        self.image = image
+        self.detections = detections or []
+        for det in self.detections:
+            if not det.ts:
+                det.ts = image.ts
+
+    def __len__(self):
+        return len(self.detections)
+
+    def __iter__(self):
+        return iter(self.detections)
+
+    def __getitem__(self, index):
+        return self.detections[index]
+
+    def to_ros_detection2d_array(self) -> Detection2DArray:
+        return Detection2DArray(
+            detections_length=len(self.detections),
+            header=Header(self.image.ts, "camera_optical"),
+            detections=[det.to_ros_detection2d() for det in self.detections],
+        )
+
+    def to_foxglove_annotations(self) -> ImageAnnotations:
+        def flatten(xss):
+            return [x for xs in xss for x in xs]
+
+        texts = flatten(det.to_text_annotation() for det in self.detections)
+        points = flatten(det.to_points_annotation() for det in self.detections)
+
+        return ImageAnnotations(
+            texts=texts,
+            texts_length=len(texts),
+            points=points,
+            points_length=len(points),
+        )
diff --git a/dimos/perception/detection2d/type/test_detection2d.py b/dimos/perception/detection2d/type/test_detection2d.py
new file mode 100644
index 0000000000..515bdee339
--- /dev/null
+++ b/dimos/perception/detection2d/type/test_detection2d.py
@@ -0,0 +1,91 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+
+
+def test_detection_basic_properties(detection2d):
+    """Test basic detection properties."""
+    assert detection2d.track_id >= 0
+    assert detection2d.class_id >= 0
+    assert 0.0 <= detection2d.confidence <= 1.0
+    assert detection2d.name is not None
+    assert detection2d.ts > 0
+
+
+def test_bounding_box_format(detection2d):
+    """Test bounding box format and validity."""
+    bbox = detection2d.bbox
+    assert len(bbox) == 4, "Bounding box should have 4 values"
+
+    x1, y1, x2, y2 = bbox
+    assert x2 > x1, "x2 should be greater than x1"
+    assert y2 > y1, "y2 should be greater than y1"
+    assert x1 >= 0, "x1 should be non-negative"
+    assert y1 >= 0, "y1 should be non-negative"
+
+
+def test_bbox_2d_volume(detection2d):
+    """Test bounding box volume calculation."""
+    volume = detection2d.bbox_2d_volume()
+    assert volume > 0, "Bounding box volume should be positive"
+
+    # Calculate expected volume
+    x1, y1, x2, y2 = detection2d.bbox
+    expected_volume = (x2 - x1) * (y2 - y1)
+    assert volume == pytest.approx(expected_volume, abs=0.001)
+
+
+def test_bbox_center_calculation(detection2d):
+    """Test bounding box center calculation."""
+    center_bbox = detection2d.get_bbox_center()
+    assert len(center_bbox) == 4, "Center bbox should have 4 values"
+
+    center_x, center_y, width, height = center_bbox
+    x1, y1, x2, y2 = detection2d.bbox
+
+    # Verify center calculations
+    assert center_x == pytest.approx((x1 + x2) / 2.0, abs=0.001)
+    assert center_y == pytest.approx((y1 + y2) / 2.0, abs=0.001)
+    assert width == pytest.approx(x2 - x1, abs=0.001)
+    assert height == pytest.approx(y2 - y1, abs=0.001)
+
+
+def test_cropped_image(detection2d):
+    """Test cropped image generation."""
+    padding = 20
+    cropped = detection2d.cropped_image(padding=padding)
+
+    assert cropped is not None, "Cropped image should not be None"
+
+    # The actual cropped image is (260, 192, 3)
+    assert cropped.width == 192
+    assert cropped.height == 260
+    assert cropped.shape == (260, 192, 3)
+
+
+def test_to_ros_bbox(detection2d):
+    """Test ROS bounding box conversion."""
+    ros_bbox = detection2d.to_ros_bbox()
+
+    assert ros_bbox is not None
+    assert hasattr(ros_bbox, "center")
+    assert hasattr(ros_bbox, "size_x")
+    assert hasattr(ros_bbox, "size_y")
+
+    # Verify values match
+    center_x, center_y, width, height = detection2d.get_bbox_center()
+    assert ros_bbox.center.position.x == pytest.approx(center_x, abs=0.001)
+    assert ros_bbox.center.position.y == pytest.approx(center_y, abs=0.001)
+    assert ros_bbox.size_x == pytest.approx(width, abs=0.001)
+    assert ros_bbox.size_y == pytest.approx(height, abs=0.001)
diff --git a/dimos/perception/detection2d/type/test_detection3d.py b/dimos/perception/detection2d/type/test_detection3d.py
new file mode 100644
index 0000000000..c8215b9601
--- /dev/null
+++ b/dimos/perception/detection2d/type/test_detection3d.py
@@ -0,0 +1,141 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+
+
+def test_oriented_bounding_box(detection3d):
+    """Test oriented bounding box calculation and values."""
+    obb = detection3d.get_oriented_bounding_box()
+    assert obb is not None, "Oriented bounding box should not be None"
+
+    # Verify OBB center values
+    assert obb.center[0] == pytest.approx(-3.36002, abs=0.1)
+    assert obb.center[1] == pytest.approx(-0.196446, abs=0.1)
+    assert obb.center[2] == pytest.approx(0.220184, abs=0.1)
+
+    # Verify OBB extent values
+    assert obb.extent[0] == pytest.approx(0.531275, abs=0.1)
+    assert obb.extent[1] == pytest.approx(0.461054, abs=0.1)
+    assert obb.extent[2] == pytest.approx(0.155, abs=0.1)
+
+
+def test_bounding_box_dimensions(detection3d):
+    """Test bounding box dimension calculation."""
+    dims = detection3d.get_bounding_box_dimensions()
+    assert len(dims) == 3, "Bounding box dimensions should have 3 values"
+    assert dims[0] == pytest.approx(0.350, abs=0.1)
+    assert dims[1] == pytest.approx(0.250, abs=0.1)
+    assert dims[2] == pytest.approx(0.550, abs=0.1)
+
+
+def test_axis_aligned_bounding_box(detection3d):
+    """Test axis-aligned bounding box calculation."""
+    aabb = detection3d.get_bounding_box()
+    assert aabb is not None, "Axis-aligned bounding box should not be None"
+
+    # Verify AABB min values
+    assert aabb.min_bound[0] == pytest.approx(-3.575, abs=0.1)
+    assert aabb.min_bound[1] == pytest.approx(-0.375, abs=0.1)
+    assert aabb.min_bound[2] == pytest.approx(-0.075, abs=0.1)
+
+    # Verify AABB max values
+    assert aabb.max_bound[0] == pytest.approx(-3.075, abs=0.1)
+    assert aabb.max_bound[1] == pytest.approx(-0.125, abs=0.1)
+    assert aabb.max_bound[2] == pytest.approx(0.475, abs=0.1)
+
+
+def test_point_cloud_properties(detection3d):
+    """Test point cloud data and boundaries."""
+    pc_points = detection3d.pointcloud.points()
+    assert len(pc_points) in [69, 70]
+    assert detection3d.pointcloud.frame_id == "world", (
+        f"Expected frame_id 'world', got '{detection3d.pointcloud.frame_id}'"
+    )
+
+    # Extract xyz coordinates from points
+    points = np.array([[pt[0], pt[1], pt[2]] for pt in pc_points])
+
+    min_pt = np.min(points, axis=0)
+    max_pt = np.max(points, axis=0)
+    center = np.mean(points, axis=0)
+
+    # Verify point cloud boundaries
+    assert min_pt[0] == pytest.approx(-3.575, abs=0.1)
+    assert min_pt[1] == pytest.approx(-0.375, abs=0.1)
+    assert min_pt[2] == pytest.approx(-0.075, abs=0.1)
+
+    assert max_pt[0] == pytest.approx(-3.075, abs=0.1)
+    assert max_pt[1] == pytest.approx(-0.125, abs=0.1)
+    assert max_pt[2] == pytest.approx(0.475, abs=0.1)
+
+    assert center[0] == pytest.approx(-3.326, abs=0.1)
+    assert center[1] == pytest.approx(-0.202, abs=0.1)
+    assert center[2] == pytest.approx(0.160, abs=0.1)
+
+
+def test_foxglove_scene_entity_generation(detection3d):
+    """Test Foxglove scene entity creation and structure."""
+    entity = detection3d.to_foxglove_scene_entity("test_entity_123")
+
+    # Verify entity metadata
+    assert entity.id == "1", f"Expected entity ID '1', got '{entity.id}'"
+    assert entity.frame_id == "world", f"Expected frame_id 'world', got '{entity.frame_id}'"
+    assert entity.cubes_length == 1, f"Expected 1 cube, got {entity.cubes_length}"
+    assert entity.texts_length == 1, f"Expected 1 text, got {entity.texts_length}"
+
+
+def test_foxglove_cube_properties(detection3d):
+    """Test Foxglove cube primitive properties."""
+    entity = detection3d.to_foxglove_scene_entity("test_entity_123")
+    cube = entity.cubes[0]
+
+    # Verify position
+    assert cube.pose.position.x == pytest.approx(-3.325, abs=0.1)
+    assert cube.pose.position.y == pytest.approx(-0.250, abs=0.1)
+    assert cube.pose.position.z == pytest.approx(0.200, abs=0.1)
+
+    # Verify size
+    assert cube.size.x == pytest.approx(0.350, abs=0.1)
+    assert cube.size.y == pytest.approx(0.250, abs=0.1)
+    assert cube.size.z == pytest.approx(0.550, abs=0.1)
+
+    # Verify color (green with alpha)
+    assert cube.color.r == pytest.approx(0.08235294117647059, abs=0.1)
+    assert cube.color.g == pytest.approx(0.7176470588235294, abs=0.1)
+    assert cube.color.b == pytest.approx(0.28627450980392155, abs=0.1)
+    assert cube.color.a == pytest.approx(0.2, abs=0.1)
+
+
+def test_foxglove_text_label(detection3d):
+    """Test Foxglove text label properties."""
+    entity = detection3d.to_foxglove_scene_entity("test_entity_123")
+    text = entity.texts[0]
+
+    assert text.text == "1/suitcase (81%)", f"Expected text '1/suitcase (81%)', got '{text.text}'"
+    assert text.pose.position.x == pytest.approx(-3.325, abs=0.1)
+    assert text.pose.position.y == pytest.approx(-0.250, abs=0.1)
+    assert text.pose.position.z == pytest.approx(0.575, abs=0.1)
+    assert text.font_size == 20.0, f"Expected font size 20.0, got {text.font_size}"
+
+
+def test_detection_pose(detection3d):
+    """Test detection pose and frame information."""
+    assert detection3d.pose.x == pytest.approx(-3.327, abs=0.1)
+    assert detection3d.pose.y == pytest.approx(-0.202, abs=0.1)
+    assert detection3d.pose.z == pytest.approx(0.160, abs=0.1)
+    assert detection3d.pose.frame_id == "world", (
+        f"Expected frame_id 'world', got '{detection3d.pose.frame_id}'"
+    )
diff --git a/dimos/perception/detection2d/type/test_object3d.py b/dimos/perception/detection2d/type/test_object3d.py
new file mode 100644
index 0000000000..4fac1dcf10
--- /dev/null
+++ b/dimos/perception/detection2d/type/test_object3d.py
@@ -0,0 +1,188 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from dimos.perception.detection2d.module2D import Detection2DModule
+from dimos.perception.detection2d.module3D import Detection3DModule
+from dimos.perception.detection2d.moduleDB import Object3D, ObjectDBModule
+from dimos.perception.detection2d.type.detection3d import ImageDetections3D
+from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule
+
+
+def test_object_db_module_populated(object_db_module):
+    """Test that ObjectDBModule is properly populated."""
+    assert len(object_db_module.objects) > 0, "Database should contain objects"
+    assert object_db_module.cnt > 0, "Object counter should be greater than 0"
+
+
+def test_object_db_module_objects_structure(all_objects):
+    """Test the structure of objects in the database."""
+    for obj in all_objects:
+        assert isinstance(obj, Object3D)
+        assert hasattr(obj, "track_id")
+        assert hasattr(obj, "detections")
+        assert hasattr(obj, "best_detection")
+        assert hasattr(obj, "center")
+        assert len(obj.detections) >= 1
+
+
+def test_object3d_properties(first_object):
+    """Test basic properties of an Object3D."""
+    assert first_object.track_id is not None
+    assert isinstance(first_object.track_id, str)
+    assert first_object.name is not None
+    assert first_object.class_id >= 0
+    assert 0.0 <= first_object.confidence <= 1.0
+    assert first_object.ts > 0
+    assert first_object.frame_id is not None
+    assert first_object.best_detection is not None
+
+
+def test_object3d_multiple_detections(all_objects):
+    """Test objects that have been built from multiple detections."""
+    # Find objects with multiple detections
+    multi_detection_objects = [obj for obj in all_objects if len(obj.detections) > 1]
+
+    if multi_detection_objects:
+        obj = multi_detection_objects[0]
+
+        # Test that confidence is the max of all detections
+        max_conf = max(d.confidence for d in obj.detections)
+        assert obj.confidence == max_conf
+
+        # Test that timestamp is the max (most recent)
+        max_ts = max(d.ts for d in obj.detections)
+        assert obj.ts == max_ts
+
+        # Test that best_detection has the largest bbox volume
+        best_volume = obj.best_detection.bbox_2d_volume()
+        for det in obj.detections:
+            assert det.bbox_2d_volume() <= best_volume
+
+
+def test_object3d_center(first_object):
+    """Test Object3D center calculation."""
+    assert first_object.center is not None
+    assert hasattr(first_object.center, "x")
+    assert hasattr(first_object.center, "y")
+    assert hasattr(first_object.center, "z")
+
+    # Center should be within reasonable bounds
+    assert -10 < first_object.center.x < 10
+    assert -10 < first_object.center.y < 10
+    assert -10 < first_object.center.z < 10
+
+
+def test_object3d_repr_dict(first_object):
+    """Test to_repr_dict method."""
+    repr_dict = first_object.to_repr_dict()
+
+    assert "object_id" in repr_dict
+    assert "detections" in repr_dict
+    assert "center" in repr_dict
+
+    assert repr_dict["object_id"] == first_object.track_id
+    assert repr_dict["detections"] == len(first_object.detections)
+
+    # Center should be formatted as string with coordinates
+    assert isinstance(repr_dict["center"], str)
+    assert repr_dict["center"].startswith("[")
+    assert repr_dict["center"].endswith("]")
+
+
+def test_object3d_scene_entity_label(first_object):
+    """Test scene entity label generation."""
+    label = first_object.scene_entity_label()
+
+    assert isinstance(label, str)
+    assert first_object.name in label
+    assert f"({len(first_object.detections)})" in label
+
+
+def test_object3d_agent_encode(first_object):
+    """Test agent encoding."""
+    encoded = first_object.agent_encode()
+
+    assert isinstance(encoded, dict)
+    assert "id" in encoded
+    assert "name" in encoded
+    assert "detections" in encoded
+    assert "last_seen" in encoded
+
+    assert encoded["id"] == first_object.track_id
+    assert encoded["name"] == first_object.name
+    assert encoded["detections"] == len(first_object.detections)
+    assert encoded["last_seen"].endswith("s ago")
+
+
+def test_object3d_image_property(first_object):
+    """Test image property returns best_detection's image."""
+    assert first_object.image is not None
+    assert first_object.image is first_object.best_detection.image
+
+
+def test_object3d_addition(object_db_module):
+    """Test Object3D addition operator."""
+    # Get existing objects from the database
+    objects = list(object_db_module.objects.values())
+    if len(objects) < 2:
+        pytest.skip("Not enough objects in database")
+
+    # Get detections from two different objects
+    det1 = objects[0].best_detection
+    det2 = objects[1].best_detection
+
+    # Create a new object with the first detection
+    obj = Object3D("test_track_combined", det1)
+
+    # Add the second detection from a different object
+    combined = obj + det2
+
+    assert combined.track_id == "test_track_combined"
+    assert len(combined.detections) == 2
+
+    # The combined object should have properties from both detections
+    assert det1 in combined.detections
+    assert det2 in combined.detections
+
+    # Best detection should be determined by the Object3D logic
+    assert combined.best_detection is not None
+
+    # Center should be valid (no specific value check since we're using real detections)
+    assert hasattr(combined, "center")
+    assert combined.center is not None
+
+
+def test_image_detections3d_scene_update(object_db_module):
+    """Test ImageDetections3D to Foxglove scene update conversion."""
+    # Get some detections
+    objects = list(object_db_module.objects.values())
+    if not objects:
+        pytest.skip("No objects in database")
+
+    detections = [obj.best_detection for obj in objects[:3]]  # Take up to 3
+
+    image_detections = ImageDetections3D(image=detections[0].image, detections=detections)
+
+    scene_update = image_detections.to_foxglove_scene_update()
+
+    assert scene_update is not None
+    assert scene_update.entities_length == len(detections)
+
+    for i, entity in enumerate(scene_update.entities):
+        assert entity.id == str(detections[i].track_id)
+        assert entity.frame_id == detections[i].frame_id
+        assert entity.cubes_length == 1
+        assert entity.texts_length == 1
diff --git a/dimos/perception/detection2d/yolo_2d_det.py b/dimos/perception/detection2d/yolo_2d_det.py
index bc8b0bc577..02c4ee5325 100644
--- a/dimos/perception/detection2d/yolo_2d_det.py
+++ b/dimos/perception/detection2d/yolo_2d_det.py
@@ -41,7 +41,7 @@ def __init__(self, model_path="models_yolo", model_name="yolo11n.onnx", device="
             device (str): Device to run inference on ('cuda' or 'cpu')
         """
         self.device = device
-        self.model = YOLO(get_data(model_path) / model_name)
+        self.model = YOLO(get_data(model_path) / model_name, task="detect")
 
         module_dir = os.path.dirname(__file__)
         self.tracker_config = os.path.join(module_dir, "config", "custom_tracker.yaml")
diff --git a/dimos/protocol/tf/tf.py b/dimos/protocol/tf/tf.py
index 9d1db5ed16..0052ef4758 100644
--- a/dimos/protocol/tf/tf.py
+++ b/dimos/protocol/tf/tf.py
@@ -281,7 +281,6 @@ def __init__(self, **kwargs) -> None:
         TFSpec.__init__(self, **kwargs)
         MultiTBuffer.__init__(self, self.config.buffer_size)
 
-        # Check if pubsub is a class (callable) or an instance
         pubsub_config = getattr(self.config, "pubsub", None)
         if pubsub_config is not None:
             if callable(pubsub_config):
@@ -291,7 +290,7 @@ def __init__(self, **kwargs) -> None:
         else:
             raise ValueError("PubSub configuration is missing")
 
-        if getattr(self.config, "autostart", True):
+        if self.config.autostart:
             self.start()
 
     def start(self, sub=True) -> None:
@@ -317,6 +316,18 @@ def publish(self, *args: Transform) -> None:
     def publish_static(self, *args: Transform) -> None:
         raise NotImplementedError("Static transforms not implemented in PubSubTF.")
 
+    def publish_all(self) -> None:
+        """Publish all transforms currently stored in all buffers."""
+        all_transforms = []
+        for buffer in self.buffers.values():
+            # Get the latest transform from each buffer
+            latest = buffer.get()  # get() with no args returns latest
+            if latest:
+                all_transforms.append(latest)
+
+        if all_transforms:
+            self.publish(*all_transforms)
+
     def get(
         self,
         parent_frame: str,
diff --git a/dimos/robot/nav_bot.py b/dimos/robot/nav_bot.py
new file mode 100644
index 0000000000..c8359c9a1c
--- /dev/null
+++ b/dimos/robot/nav_bot.py
@@ -0,0 +1,408 @@
+#!/usr/bin/env python3
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+NavBot class for navigation-related functionality.
+Encapsulates ROS bridge and topic remapping for Unitree robots.
+"""
+
+import logging
+import time
+
+from dimos import core
+from dimos.core import Module, In, Out, rpc
+from dimos.msgs.geometry_msgs import PoseStamped, TwistStamped, Transform, Vector3
+from dimos.msgs.nav_msgs import Odometry
+from dimos.msgs.sensor_msgs import PointCloud2, Joy, Image
+from dimos.msgs.std_msgs import Bool
+from dimos.msgs.tf2_msgs.TFMessage import TFMessage
+from dimos.protocol.tf import TF
+from dimos.robot.ros_bridge import ROSBridge, BridgeDirection
+from dimos.utils.transform_utils import euler_to_quaternion
+from geometry_msgs.msg import TwistStamped as ROSTwistStamped
+from geometry_msgs.msg import PoseStamped as ROSPoseStamped
+from nav_msgs.msg import Odometry as ROSOdometry
+from sensor_msgs.msg import PointCloud2 as ROSPointCloud2, Joy as ROSJoy, Image as ROSImage
+from std_msgs.msg import Bool as ROSBool
+from tf2_msgs.msg import TFMessage as ROSTFMessage
+from dimos.utils.logging_config import setup_logger
+from dimos.protocol.pubsub.lcmpubsub import LCM, Topic
+
+logger = setup_logger("dimos.robot.unitree_webrtc.nav_bot", level=logging.INFO)
+
+############################################################
+# Navigation Module
+
+# first run unitree_g1.py to start the ROS bridge and webrtc connection and teleop
+# python dimos/robot/unitree_webrtc/unitree_g1.py
+
+
+# then deploy this module in any other run file.
+############################################################
+class NavigationModule(Module):
+    goal_reached: In[Bool] = None
+
+    goal_pose: Out[PoseStamped] = None
+    cancel_goal: Out[Bool] = None
+    joy: Out[Joy] = None
+
+    def __init__(self, *args, **kwargs):
+        """Initialize NavigationModule."""
+        Module.__init__(self, *args, **kwargs)
+        self.goal_reach = None
+
+    @rpc
+    def start(self):
+        """Start the navigation module."""
+        if self.goal_reached:
+            self.goal_reached.subscribe(self._on_goal_reached)
+        logger.info("NavigationModule started")
+
+    def _on_goal_reached(self, msg: Bool):
+        """Handle goal reached status messages."""
+        self.goal_reach = msg.data
+
+    def _set_autonomy_mode(self):
+        """
+        Set autonomy mode by publishing Joy message.
+        """
+
+        joy_msg = Joy(
+            frame_id="dimos",
+            axes=[
+                0.0,  # axis 0
+                0.0,  # axis 1
+                -1.0,  # axis 2
+                0.0,  # axis 3
+                1.0,  # axis 4
+                1.0,  # axis 5
+                0.0,  # axis 6
+                0.0,  # axis 7
+            ],
+            buttons=[
+                0,  # button 0
+                0,  # button 1
+                0,  # button 2
+                0,  # button 3
+                0,  # button 4
+                0,  # button 5
+                0,  # button 6
+                1,  # button 7 - controls autonomy mode
+                0,  # button 8
+                0,  # button 9
+                0,  # button 10
+            ],
+        )
+
+        if self.joy:
+            self.joy.publish(joy_msg)
+            logger.info(f"Setting autonomy mode via Joy message")
+
+    @rpc
+    def go_to(self, pose: PoseStamped, timeout: float = 60.0) -> bool:
+        """
+        Navigate to a target pose by publishing to LCM topics.
+
+        Args:
+            pose: Target pose to navigate to
+            blocking: If True, block until goal is reached
+            timeout: Maximum time to wait for goal (seconds)
+
+        Returns:
+            True if navigation was successful (or started if non-blocking)
+        """
+        logger.info(
+            f"Navigating to goal: ({pose.position.x:.2f}, {pose.position.y:.2f}, {pose.position.z:.2f})"
+        )
+
+        self.goal_reach = None
+        self._set_autonomy_mode()
+        self.goal_pose.publish(pose)
+
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            if self.goal_reach is not None:
+                return self.goal_reach
+            time.sleep(0.1)
+
+        self.stop()
+
+        logger.warning(f"Navigation timed out after {timeout} seconds")
+        return False
+
+    @rpc
+    def stop(self) -> bool:
+        """
+        Cancel current navigation by publishing to cancel_goal.
+
+        Returns:
+            True if cancel command was sent successfully
+        """
+        logger.info("Cancelling navigation")
+
+        if self.cancel_goal:
+            cancel_msg = Bool(data=True)
+            self.cancel_goal.publish(cancel_msg)
+            return True
+
+        return False
+
+
+class TopicRemapModule(Module):
+    """Module that remaps Odometry to PoseStamped and publishes static transforms."""
+
+    odom: In[Odometry] = None
+    odom_pose: Out[PoseStamped] = None
+
+    def __init__(self, sensor_to_base_link_transform=None, *args, **kwargs):
+        Module.__init__(self, *args, **kwargs)
+        self.tf = TF()
+        self.sensor_to_base_link_transform = sensor_to_base_link_transform or [
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+        ]
+
+    @rpc
+    def start(self):
+        self.odom.subscribe(self._publish_odom_pose)
+        logger.info("TopicRemapModule started")
+
+    def _publish_odom_pose(self, msg: Odometry):
+        pose_msg = PoseStamped(
+            ts=msg.ts,
+            frame_id=msg.frame_id,
+            position=msg.pose.pose.position,
+            orientation=msg.pose.pose.orientation,
+        )
+        self.odom_pose.publish(pose_msg)
+
+        # Publish static transform from sensor to base_link
+        translation = Vector3(
+            self.sensor_to_base_link_transform[0],
+            self.sensor_to_base_link_transform[1],
+            self.sensor_to_base_link_transform[2],
+        )
+        euler_angles = Vector3(
+            self.sensor_to_base_link_transform[3],
+            self.sensor_to_base_link_transform[4],
+            self.sensor_to_base_link_transform[5],
+        )
+        rotation = euler_to_quaternion(euler_angles)
+
+        sensor_to_base_link_tf = Transform(
+            translation=translation,
+            rotation=rotation,
+            frame_id="sensor",
+            child_frame_id="base_link",
+            ts=msg.ts,
+        )
+
+        # map to world static transform
+        map_to_world_tf = Transform(
+            translation=Vector3(0.0, 0.0, 0.0),
+            rotation=euler_to_quaternion(Vector3(0.0, 0.0, 0.0)),
+            frame_id="map",
+            child_frame_id="world",
+            ts=msg.ts,
+        )
+
+        self.tf.publish(sensor_to_base_link_tf, map_to_world_tf)
+
+
+class NavBot:
+    """
+    NavBot class for navigation-related functionality.
+    Manages ROS bridge and topic remapping for navigation.
+    """
+
+    def __init__(self, dimos=None, sensor_to_base_link_transform=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]):
+        """
+        Initialize NavBot.
+
+        Args:
+            dimos: DIMOS instance (creates new one if None)
+            sensor_to_base_link_transform: Optional [x, y, z, roll, pitch, yaw] transform from sensor to base_link
+        """
+        if dimos is None:
+            self.dimos = core.start(2)
+        else:
+            self.dimos = dimos
+
+        self.sensor_to_base_link_transform = sensor_to_base_link_transform
+        self.ros_bridge = None
+        self.topic_remap_module = None
+        self.tf = TF()
+        self.lcm = LCM()
+
+    def start(self):
+        if self.topic_remap_module:
+            self.topic_remap_module.start()
+            logger.info("Topic remap module started")
+
+        if self.ros_bridge:
+            logger.info("ROS bridge started")
+
+    def deploy_navigation_modules(self, bridge_name="nav_bot_ros_bridge"):
+        # Deploy topic remap module
+        logger.info("Deploying topic remap module...")
+        self.topic_remap_module = self.dimos.deploy(
+            TopicRemapModule, sensor_to_base_link_transform=self.sensor_to_base_link_transform
+        )
+        self.topic_remap_module.odom.transport = core.LCMTransport("/odom", Odometry)
+        self.topic_remap_module.odom_pose.transport = core.LCMTransport("/odom_pose", PoseStamped)
+
+        # Deploy ROS bridge
+        logger.info("Deploying ROS bridge...")
+        self.ros_bridge = ROSBridge(bridge_name)
+
+        # Configure ROS topics
+        self.ros_bridge.add_topic(
+            "/cmd_vel", TwistStamped, ROSTwistStamped, direction=BridgeDirection.ROS_TO_DIMOS
+        )
+        self.ros_bridge.add_topic(
+            "/state_estimation",
+            Odometry,
+            ROSOdometry,
+            direction=BridgeDirection.ROS_TO_DIMOS,
+            remap_topic="/odom",
+        )
+        self.ros_bridge.add_topic(
+            "/tf", TFMessage, ROSTFMessage, direction=BridgeDirection.ROS_TO_DIMOS
+        )
+        self.ros_bridge.add_topic(
+            "/registered_scan", PointCloud2, ROSPointCloud2, direction=BridgeDirection.ROS_TO_DIMOS
+        )
+        self.ros_bridge.add_topic("/joy", Joy, ROSJoy, direction=BridgeDirection.DIMOS_TO_ROS)
+        # Navigation control topics from autonomy stack
+        self.ros_bridge.add_topic(
+            "/goal_pose", PoseStamped, ROSPoseStamped, direction=BridgeDirection.DIMOS_TO_ROS
+        )
+        self.ros_bridge.add_topic(
+            "/cancel_goal", Bool, ROSBool, direction=BridgeDirection.DIMOS_TO_ROS
+        )
+        self.ros_bridge.add_topic(
+            "/goal_reached", Bool, ROSBool, direction=BridgeDirection.ROS_TO_DIMOS
+        )
+
+        # self.ros_bridge.add_topic(
+        #     "/camera/image", Image, ROSImage, direction=BridgeDirection.ROS_TO_DIMOS
+        # )
+
+    def _set_autonomy_mode(self):
+        """
+        Set autonomy mode by publishing Joy message.
+        """
+
+        joy_msg = Joy(
+            frame_id="dimos",
+            axes=[
+                0.0,  # axis 0
+                0.0,  # axis 1
+                -1.0,  # axis 2
+                0.0,  # axis 3
+                1.0,  # axis 4
+                1.0,  # axis 5
+                0.0,  # axis 6
+                0.0,  # axis 7
+            ],
+            buttons=[
+                0,  # button 0
+                0,  # button 1
+                0,  # button 2
+                0,  # button 3
+                0,  # button 4
+                0,  # button 5
+                0,  # button 6
+                1,  # button 7 - controls autonomy mode
+                0,  # button 8
+                0,  # button 9
+                0,  # button 10
+            ],
+        )
+
+        self.lcm.publish(Topic("/joy", Joy), joy_msg)
+
+    def navigate_to_goal(self, pose: PoseStamped, blocking: bool = True, timeout: float = 30.0):
+        """Navigate to a target pose using ROS topics.
+
+        Args:
+            pose: Target pose to navigate to
+            blocking: If True, block until goal is reached. If False, return immediately.
+            timeout: Maximum time to wait for goal to be reached (seconds)
+
+        Returns:
+            If blocking=True: True if navigation was successful, False otherwise
+            If blocking=False: True if goal was sent successfully
+        """
+        logger.info(
+            f"Navigating to goal: ({pose.position.x:.2f}, {pose.position.y:.2f}, {pose.position.z:.2f})"
+        )
+
+        # Publish goal to /goal_pose topic
+        self._set_autonomy_mode()
+        goal_topic = Topic("/goal_pose", PoseStamped)
+        self.lcm.publish(goal_topic, pose)
+
+        if not blocking:
+            return True
+
+        # Wait for goal_reached signal
+        goal_reached_topic = Topic("/goal_reached", Bool)
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            try:
+                msg = self.lcm.wait_for_message(goal_reached_topic, timeout=0.5)
+                if msg and msg.data:
+                    logger.info("Navigation goal reached")
+                    return True
+                elif msg and not msg.data:
+                    logger.info("Navigation was cancelled or failed")
+                    return False
+            except Exception:
+                # Timeout on wait_for_message, continue looping
+                pass
+
+        logger.warning(f"Navigation timed out after {timeout} seconds")
+        return False
+
+    def cancel_navigation(self) -> bool:
+        """Cancel the current navigation goal using ROS topics.
+
+        Returns:
+            True if cancel command was sent successfully
+        """
+        logger.info("Cancelling navigation goal")
+
+        # Publish cancel command to /cancel_goal topic
+        cancel_topic = Topic("/cancel_goal", Bool)
+        cancel_msg = Bool(data=True)
+        self.lcm.publish(cancel_topic, cancel_msg)
+
+        return True
+
+    def shutdown(self):
+        logger.info("Shutting down navigation modules...")
+
+        if self.ros_bridge is not None:
+            try:
+                self.ros_bridge.shutdown()
+                logger.info("ROS bridge shut down successfully")
+            except Exception as e:
+                logger.error(f"Error shutting down ROS bridge: {e}")
diff --git a/dimos/robot/unitree_webrtc/modular/connection_module.py b/dimos/robot/unitree_webrtc/modular/connection_module.py
index c6214c4f2c..6e13ed938e 100644
--- a/dimos/robot/unitree_webrtc/modular/connection_module.py
+++ b/dimos/robot/unitree_webrtc/modular/connection_module.py
@@ -18,6 +18,7 @@
 import functools
 import logging
 import os
+import queue
 import time
 import warnings
 from dataclasses import dataclass
@@ -28,7 +29,8 @@
 from reactivex import operators as ops
 from reactivex.observable import Observable
 
-from dimos.core import In, LCMTransport, Module, ModuleConfig, Out, rpc, DimosCluster
+from dimos.agents2 import Agent, Output, Reducer, Stream, skill
+from dimos.core import DimosCluster, In, LCMTransport, Module, ModuleConfig, Out, rpc
 from dimos.msgs.foxglove_msgs import ImageAnnotations
 from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Twist, Vector3
 from dimos.msgs.sensor_msgs.Image import Image, sharpness_window
@@ -103,9 +105,7 @@ def odom_stream(self):
     @functools.cache
     def video_stream(self):
         print("video stream start")
-        video_store = TimedSensorReplay(
-            f"{self.dir_name}/video",
-        )
+        video_store = TimedSensorReplay(f"{self.dir_name}/video")
 
         return video_store.stream(**self.replay_config)
 
@@ -136,11 +136,27 @@ class ConnectionModule(Module):
 
     default_config = ConnectionModuleConfig
 
+    # mega temporary, skill should have a limit decorator for number of
+    # parallel calls
+    video_running: bool = False
+
     def __init__(self, connection_type: str = "webrtc", *args, **kwargs):
         self.connection_config = kwargs
         self.connection_type = connection_type
         Module.__init__(self, *args, **kwargs)
 
+    @skill(stream=Stream.passive, output=Output.image, reducer=Reducer.latest)
+    def video_stream_tool(self) -> Image:
+        """implicit video stream skill, don't call this directly"""
+        if self.video_running:
+            return "video stream already running"
+        self.video_running = True
+        _queue = queue.Queue(maxsize=1)
+        self.connection.video_stream().subscribe(_queue.put)
+
+        for image in iter(_queue.get, None):
+            yield image
+
     @rpc
     def record(self, recording_name: str):
         lidar_store: TimedSensorStorage = TimedSensorStorage(f"{recording_name}/lidar")
@@ -183,10 +199,8 @@ def resize(image: Image) -> Image:
                 int(originalwidth / image_resize_factor), int(originalheight / image_resize_factor)
             )
 
-        sharpness = sharpness_window(10, self.connection.video_stream())
-        sharpness.subscribe(self.video.publish)
-        # self.connection.video_stream().subscribe(self.video.publish)
-
+        self.connection.video_stream().subscribe(self.video.publish)
+        # sharpness_window(15.0, self.connection.video_stream()).subscribe(self.video.publish)
         # self.connection.video_stream().pipe(ops.map(resize)).subscribe(self.video.publish)
         self.camera_info_stream().subscribe(self.camera_info.publish)
         self.movecmd.subscribe(self.connection.move)
@@ -278,8 +292,8 @@ def camera_info_stream(self) -> Observable[CameraInfo]:
 
 
 def deploy_connection(dimos: DimosCluster, **kwargs):
-    # foxglove_bridge = dimos.deploy(FoxgloveBridge)
-    # foxglove_bridge.start()
+    foxglove_bridge = dimos.deploy(FoxgloveBridge)
+    foxglove_bridge.start()
 
     connection = dimos.deploy(
         ConnectionModule,
diff --git a/dimos/robot/unitree_webrtc/modular/ivan_unitree.py b/dimos/robot/unitree_webrtc/modular/ivan_unitree.py
index 5893248530..73927cf248 100644
--- a/dimos/robot/unitree_webrtc/modular/ivan_unitree.py
+++ b/dimos/robot/unitree_webrtc/modular/ivan_unitree.py
@@ -16,7 +16,9 @@
 import time
 
 from dimos_lcm.sensor_msgs import CameraInfo
+from lcm_msgs.foxglove_msgs import SceneUpdate
 
+from dimos.agents2.spec import Model, Provider
 from dimos.core import LCMTransport, start
 
 # from dimos.msgs.detection2d import Detection2DArray
@@ -24,6 +26,7 @@
 from dimos.msgs.sensor_msgs import Image, PointCloud2
 from dimos.msgs.vision_msgs import Detection2DArray
 from dimos.perception.detection2d import Detection3DModule
+from dimos.perception.detection2d.moduleDB import ObjectDBModule
 from dimos.protocol.pubsub import lcm
 from dimos.robot.unitree_webrtc.modular import deploy_connection, deploy_navigation
 from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule
@@ -34,15 +37,22 @@
 
 def detection_unitree():
     dimos = start(6)
-
     connection = deploy_connection(dimos)
-    connection.start()
-    # connection.record("unitree_go2_office_walk2")
     # mapper = deploy_navigation(dimos, connection)
+    # mapper.start()
+
+    def goto(pose):
+        print("NAVIGATION REQUESTED:", pose)
+        return True
 
-    module3D = dimos.deploy(Detection3DModule, camera_info=ConnectionModule._camera_info())
+    module3D = dimos.deploy(
+        ObjectDBModule,
+        goto=goto,
+        camera_info=ConnectionModule._camera_info(),
+    )
 
     module3D.image.connect(connection.video)
+    # module3D.pointcloud.connect(mapper.global_map)
     module3D.pointcloud.connect(connection.lidar)
 
     module3D.annotations.transport = LCMTransport("/annotations", ImageAnnotations)
@@ -55,16 +65,37 @@ def detection_unitree():
     module3D.detected_image_0.transport = LCMTransport("/detected/image/0", Image)
     module3D.detected_image_1.transport = LCMTransport("/detected/image/1", Image)
     module3D.detected_image_2.transport = LCMTransport("/detected/image/2", Image)
+
+    module3D.scene_update.transport = LCMTransport("/scene_update", SceneUpdate)
+
     module3D.start()
-    # detection.start()
+    connection.start()
+
+    from dimos.agents2 import Agent, Output, Reducer, Stream, skill
+    from dimos.agents2.cli.human import HumanInput
+
+    agent = Agent(
+        system_prompt="You are a helpful assistant for controlling a Unitree Go2 robot. ",
+        model=Model.GPT_4O,  # Could add CLAUDE models to enum
+        provider=Provider.OPENAI,  # Would need ANTHROPIC provider
+    )
+
+    human_input = dimos.deploy(HumanInput)
+    agent.register_skills(human_input)
+    # agent.register_skills(connection)
+    agent.register_skills(module3D)
+
+    # agent.run_implicit_skill("video_stream_tool")
+    agent.run_implicit_skill("human")
+
+    agent.start()
+    agent.loop_thread()
 
     try:
         while True:
             time.sleep(1)
     except KeyboardInterrupt:
         connection.stop()
-        # mapper.stop()
-        # detection.stop()
         logger.info("Shutting down...")
 
 
diff --git a/dimos/robot/unitree_webrtc/modular/navigation.py b/dimos/robot/unitree_webrtc/modular/navigation.py
index 8ceaf0e195..c37cac700a 100644
--- a/dimos/robot/unitree_webrtc/modular/navigation.py
+++ b/dimos/robot/unitree_webrtc/modular/navigation.py
@@ -27,7 +27,7 @@
 
 
 def deploy_navigation(dimos, connection):
-    mapper = dimos.deploy(Map, voxel_size=0.5, cost_resolution=0.05, global_publish_interval=1.0)
+    mapper = dimos.deploy(Map, voxel_size=0.5, cost_resolution=0.05, global_publish_interval=0.5)
     mapper.lidar.connect(connection.lidar)
     mapper.global_map.transport = LCMTransport("/global_map", LidarMessage)
     mapper.global_costmap.transport = LCMTransport("/global_costmap", OccupancyGrid)
diff --git a/dimos/robot/unitree_webrtc/rosnav.py b/dimos/robot/unitree_webrtc/rosnav.py
new file mode 100644
index 0000000000..969ddad950
--- /dev/null
+++ b/dimos/robot/unitree_webrtc/rosnav.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import time
+
+from dimos import core
+from dimos.core import Module, In, Out, rpc
+from dimos.msgs.geometry_msgs import PoseStamped, TwistStamped, Transform, Vector3
+from dimos.msgs.nav_msgs import Odometry
+from dimos.msgs.sensor_msgs import PointCloud2, Joy
+from dimos.msgs.std_msgs.Bool import Bool
+from dimos.msgs.std_msgs.Header import Header
+from dimos.msgs.tf2_msgs.TFMessage import TFMessage
+from dimos.protocol.tf import TF
+from dimos.robot.ros_bridge import ROSBridge, BridgeDirection
+from dimos.utils.transform_utils import euler_to_quaternion
+from geometry_msgs.msg import TwistStamped as ROSTwistStamped
+from geometry_msgs.msg import PoseStamped as ROSPoseStamped
+from nav_msgs.msg import Odometry as ROSOdometry
+from sensor_msgs.msg import PointCloud2 as ROSPointCloud2
+from std_msgs.msg import Bool as ROSBool
+from tf2_msgs.msg import TFMessage as ROSTFMessage
+from dimos.utils.logging_config import setup_logger
+from dimos.protocol.pubsub.lcmpubsub import LCM, Topic
+
+logger = setup_logger("dimos.robot.unitree_webrtc.nav_bot", level=logging.INFO)
+
+
+class NavigationModule(Module):
+    goal_pose: Out[PoseStamped] = None
+    goal_reached: In[Bool] = None
+    cancel_goal: Out[Bool] = None
+    joy: Out[Joy] = None
+
+    def __init__(self, *args, **kwargs):
+        """Initialize NavigationModule."""
+        Module.__init__(self, *args, **kwargs)
+        self.goal_reach = None
+
+    @rpc
+    def start(self):
+        """Start the navigation module."""
+        if self.goal_reached:
+            self.goal_reached.subscribe(self._on_goal_reached)
+        logger.info("NavigationModule started")
+
+    def _on_goal_reached(self, msg: Bool):
+        """Handle goal reached status messages."""
+        self.goal_reach = msg.data
+
+    def _set_autonomy_mode(self):
+        """
+        Set autonomy mode by publishing Joy message.
+        """
+
+        joy_msg = Joy(
+            frame_id="dimos",
+            axes=[
+                0.0,  # axis 0
+                0.0,  # axis 1
+                -1.0,  # axis 2
+                0.0,  # axis 3
+                1.0,  # axis 4
+                1.0,  # axis 5
+                0.0,  # axis 6
+                0.0,  # axis 7
+            ],
+            buttons=[
+                0,  # button 0
+                0,  # button 1
+                0,  # button 2
+                0,  # button 3
+                0,  # button 4
+                0,  # button 5
+                0,  # button 6
+                1,  # button 7 - controls autonomy mode
+                0,  # button 8
+                0,  # button 9
+                0,  # button 10
+            ],
+        )
+
+        if self.joy:
+            self.joy.publish(joy_msg)
+            logger.info(f"Setting autonomy mode via Joy message")
+
+    @rpc
+    def go_to(self, pose: PoseStamped, timeout: float = 60.0) -> bool:
+        """
+        Navigate to a target pose by publishing to LCM topics.
+
+        Args:
+            pose: Target pose to navigate to
+            blocking: If True, block until goal is reached
+            timeout: Maximum time to wait for goal (seconds)
+
+        Returns:
+            True if navigation was successful (or started if non-blocking)
+        """
+        logger.info(
+            f"Navigating to goal: ({pose.position.x:.2f}, {pose.position.y:.2f}, {pose.position.z:.2f})"
+        )
+
+        self.goal_reach = None
+        self._set_autonomy_mode()
+        self.goal_pose.publish(pose)
+        time.sleep(0.2)
+        self.goal_pose.publish(pose)
+
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            if self.goal_reach is not None:
+                return self.goal_reach
+            time.sleep(0.1)
+
+        self.stop()
+
+        logger.warning(f"Navigation timed out after {timeout} seconds")
+        return False
+
+    @rpc
+    def stop(self) -> bool:
+        """
+        Cancel current navigation by publishing to cancel_goal.
+
+        Returns:
+            True if cancel command was sent successfully
+        """
+        logger.info("Cancelling navigation")
+
+        if self.cancel_goal:
+            cancel_msg = Bool(data=True)
+            self.cancel_goal.publish(cancel_msg)
+            return True
+
+        return False
diff --git a/dimos/robot/unitree_webrtc/unitree_b1/test_connection.py b/dimos/robot/unitree_webrtc/unitree_b1/test_connection.py
index c54f124f02..e595a1adde 100644
--- a/dimos/robot/unitree_webrtc/unitree_b1/test_connection.py
+++ b/dimos/robot/unitree_webrtc/unitree_b1/test_connection.py
@@ -271,7 +271,7 @@ def test_watchdog_timing_accuracy(self):
         # Check timing (should be close to 200ms + up to 50ms watchdog interval)
         elapsed = timeout_time - start_time
         print(f"\nWatchdog timeout occurred at exactly {elapsed:.3f} seconds")
-        assert 0.19 <= elapsed <= 0.26, f"Watchdog timed out at {elapsed:.3f}s, expected ~0.2-0.25s"
+        assert 0.19 <= elapsed <= 0.3, f"Watchdog timed out at {elapsed:.3f}s, expected ~0.2-0.25s"
 
         conn.running = False
         conn.watchdog_running = False
diff --git a/dimos/robot/unitree_webrtc/unitree_g1.py b/dimos/robot/unitree_webrtc/unitree_g1.py
index 0011eac760..08a23bc2dc 100644
--- a/dimos/robot/unitree_webrtc/unitree_g1.py
+++ b/dimos/robot/unitree_webrtc/unitree_g1.py
@@ -18,43 +18,62 @@
 Minimal implementation using WebRTC connection for robot control.
 """
 
+import logging
 import os
 import time
-import logging
 from typing import Optional
 
+from dimos_lcm.foxglove_msgs import SceneUpdate
+from geometry_msgs.msg import PoseStamped as ROSPoseStamped
+from geometry_msgs.msg import TwistStamped as ROSTwistStamped
+from nav_msgs.msg import Odometry as ROSOdometry
+from sensor_msgs.msg import Image as ROSImage
+from sensor_msgs.msg import Joy as ROSJoy
+from sensor_msgs.msg import PointCloud2 as ROSPointCloud2
+from tf2_msgs.msg import TFMessage as ROSTFMessage
+
 from dimos import core
-from dimos.constants import DEFAULT_CAPACITY_COLOR_IMAGE, DEFAULT_CAPACITY_DEPTH_IMAGE
-from dimos.core import Module, In, Out, rpc
-from dimos.msgs.geometry_msgs import PoseStamped, Twist, TwistStamped
+from dimos.agents2 import Agent
+from dimos.agents2.cli.human import HumanInput
+from dimos.agents2.skills.ros_navigation import RosNavigation
+from dimos.agents2.spec import Model, Provider
+from dimos.core import In, Module, Out, rpc
+from dimos.hardware.camera import zed
+from dimos.hardware.camera.module import CameraModule
+from dimos.hardware.camera.webcam import Webcam
+from dimos.msgs.foxglove_msgs import ImageAnnotations
+from dimos.msgs.geometry_msgs import (
+    PoseStamped,
+    Quaternion,
+    Transform,
+    Twist,
+    TwistStamped,
+    Vector3,
+)
 from dimos.msgs.nav_msgs.Odometry import Odometry
-from dimos.msgs.sensor_msgs import CameraInfo, PointCloud2
+from dimos.msgs.sensor_msgs import CameraInfo, Image, Joy, PointCloud2
+from dimos.msgs.std_msgs.Bool import Bool
 from dimos.msgs.tf2_msgs.TFMessage import TFMessage
+from dimos.msgs.vision_msgs import Detection2DArray
+from dimos.perception.detection2d import Detection3DModule
+from dimos.perception.detection2d.moduleDB import ObjectDBModule
+from dimos.perception.spatial_perception import SpatialMemory
 from dimos.protocol import pubsub
 from dimos.protocol.pubsub.lcmpubsub import LCM
 from dimos.robot.foxglove_bridge import FoxgloveBridge
-from dimos.web.websocket_vis.websocket_vis_module import WebsocketVisModule
+from dimos.robot.robot import Robot
+from dimos.robot.ros_bridge import BridgeDirection, ROSBridge
 from dimos.robot.unitree_webrtc.connection import UnitreeWebRTCConnection
+from dimos.robot.unitree_webrtc.rosnav import NavigationModule
+from dimos.robot.unitree_webrtc.unitree_g1_skill_container import UnitreeG1SkillContainer
 from dimos.robot.unitree_webrtc.unitree_skills import MyUnitreeSkills
-from dimos.robot.ros_bridge import ROSBridge, BridgeDirection
-from geometry_msgs.msg import TwistStamped as ROSTwistStamped
-from nav_msgs.msg import Odometry as ROSOdometry
-from sensor_msgs.msg import PointCloud2 as ROSPointCloud2
-from tf2_msgs.msg import TFMessage as ROSTFMessage
 from dimos.skills.skills import SkillLibrary
-from dimos.robot.robot import Robot
-
 from dimos.types.robot_capabilities import RobotCapability
 from dimos.utils.logging_config import setup_logger
+from dimos.web.websocket_vis.websocket_vis_module import WebsocketVisModule
 
 logger = setup_logger("dimos.robot.unitree_webrtc.unitree_g1", level=logging.INFO)
 
-try:
-    from dimos.hardware.camera.zed import ZEDModule
-except ImportError:
-    logger.warning("ZEDModule not found. Please install pyzed to use ZED camera functionality.")
-    ZEDModule = None
-
 # Suppress verbose loggers
 logging.getLogger("aiortc.codecs.h264").setLevel(logging.ERROR)
 logging.getLogger("lcm_foxglove_bridge").setLevel(logging.ERROR)
@@ -123,6 +142,7 @@ def __init__(
         enable_joystick: bool = False,
         enable_connection: bool = True,
         enable_ros_bridge: bool = True,
+        enable_perception: bool = False,
         enable_camera: bool = False,
     ):
         """Initialize the G1 robot.
@@ -137,7 +157,7 @@ def __init__(
             enable_joystick: Enable pygame joystick control
             enable_connection: Enable robot connection module
             enable_ros_bridge: Enable ROS bridge
-            enable_camera: Enable ZED camera module
+            enable_camera: Enable web camera module
         """
         super().__init__()
         self.ip = ip
@@ -147,6 +167,7 @@ def __init__(
         self.enable_joystick = enable_joystick
         self.enable_connection = enable_connection
         self.enable_ros_bridge = enable_ros_bridge
+        self.enable_perception = enable_perception
         self.enable_camera = enable_camera
         self.websocket_port = websocket_port
         self.lcm = LCM()
@@ -166,28 +187,71 @@ def __init__(
         self.connection = None
         self.websocket_vis = None
         self.foxglove_bridge = None
+        self.spatial_memory_module = None
         self.joystick = None
         self.ros_bridge = None
-        self.zed_camera = None
-
+        self.camera = None
         self._setup_directories()
 
     def _setup_directories(self):
-        """Setup output directories."""
+        """Setup directories for spatial memory storage."""
         os.makedirs(self.output_dir, exist_ok=True)
         logger.info(f"Robot outputs will be saved to: {self.output_dir}")
 
+        # Initialize memory directories
+        self.memory_dir = os.path.join(self.output_dir, "memory")
+        os.makedirs(self.memory_dir, exist_ok=True)
+
+        # Initialize spatial memory properties
+        self.spatial_memory_dir = os.path.join(self.memory_dir, "spatial_memory")
+        self.spatial_memory_collection = "spatial_memory"
+        self.db_path = os.path.join(self.spatial_memory_dir, "chromadb_data")
+        self.visual_memory_path = os.path.join(self.spatial_memory_dir, "visual_memory.pkl")
+
+        # Create spatial memory directories
+        os.makedirs(self.spatial_memory_dir, exist_ok=True)
+        os.makedirs(self.db_path, exist_ok=True)
+
+    def _deploy_detection(self, goto):
+        detection = self.dimos.deploy(
+            ObjectDBModule, goto=goto, camera_info=zed.CameraInfo.SingleWebcam
+        )
+
+        detection.image.connect(self.camera.image)
+        detection.pointcloud.transport = core.LCMTransport("/map", PointCloud2)
+
+        detection.annotations.transport = core.LCMTransport("/annotations", ImageAnnotations)
+        detection.detections.transport = core.LCMTransport("/detections", Detection2DArray)
+
+        detection.scene_update.transport = core.LCMTransport("/scene_update", SceneUpdate)
+        detection.target.transport = core.LCMTransport("/target", PoseStamped)
+        detection.detected_pointcloud_0.transport = core.LCMTransport(
+            "/detected/pointcloud/0", PointCloud2
+        )
+        detection.detected_pointcloud_1.transport = core.LCMTransport(
+            "/detected/pointcloud/1", PointCloud2
+        )
+        detection.detected_pointcloud_2.transport = core.LCMTransport(
+            "/detected/pointcloud/2", PointCloud2
+        )
+
+        detection.detected_image_0.transport = core.LCMTransport("/detected/image/0", Image)
+        detection.detected_image_1.transport = core.LCMTransport("/detected/image/1", Image)
+        detection.detected_image_2.transport = core.LCMTransport("/detected/image/2", Image)
+
+        self.detection = detection
+
     def start(self):
         """Start the robot system with all modules."""
-        self.dimos = core.start(4)  # 2 workers for connection and visualization
+        self.dimos = core.start(8)  # 2 workers for connection and visualization
 
         if self.enable_connection:
             self._deploy_connection()
 
         self._deploy_visualization()
 
-        if self.enable_camera:
-            self._deploy_camera()
+        if self.enable_perception:
+            self._deploy_perception()
 
         if self.enable_joystick:
             self._deploy_joystick()
@@ -195,12 +259,65 @@ def start(self):
         if self.enable_ros_bridge:
             self._deploy_ros_bridge()
 
-        self._start_modules()
+        self.nav = self.dimos.deploy(NavigationModule)
+        self.nav.goal_reached.transport = core.LCMTransport("/goal_reached", Bool)
+        self.nav.goal_pose.transport = core.LCMTransport("/goal_pose", PoseStamped)
+        self.nav.cancel_goal.transport = core.LCMTransport("/cancel_goal", Bool)
+        self.nav.joy.transport = core.LCMTransport("/joy", Joy)
+        self.nav.start()
+
+        self._deploy_camera()
+        self._deploy_detection(self.nav.go_to)
 
         self.lcm.start()
 
+        # Setup agent with G1 skills
+        logger.info("Setting up agent with G1 skills...")
+
+        agent = Agent(
+            system_prompt="You are a helpful assistant controlling a Unitree G1 humanoid robot. You can control the robot's arms, movement modes, and navigation.",
+            model=Model.GPT_4O,
+            provider=Provider.OPENAI,
+        )
+
+        # Register G1-specific skill container
+        g1_skills = UnitreeG1SkillContainer(robot=self)
+        agent.register_skills(g1_skills)
+
+        human_input = self.dimos.deploy(HumanInput)
+        agent.register_skills(human_input)
+
+        if self.enable_perception:
+            agent.register_skills(self.detection)
+
+        # Register ROS navigation
+        ros_nav = RosNavigation(self)
+        ros_nav.__enter__()
+        agent.register_skills(ros_nav)
+
+        agent.run_implicit_skill("human")
+        agent.start()
+
+        # For logging
+        skills = [tool.name for tool in agent.get_tools()]
+        logger.info(f"Agent configured with {len(skills)} skills: {', '.join(skills)}")
+
+        agent.loop_thread()
+
         logger.info("UnitreeG1 initialized and started")
         logger.info(f"WebSocket visualization available at http://localhost:{self.websocket_port}")
+        self._start_modules()
+
+    def stop(self):
+        self.lcm.stop()
+
+    def __enter__(self) -> "UnitreeG1":
+        self.start()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+        return False
 
     def _deploy_connection(self):
         """Deploy and configure the connection module."""
@@ -212,46 +329,28 @@ def _deploy_connection(self):
         self.connection.odom_pose.transport = core.LCMTransport("/odom", PoseStamped)
 
     def _deploy_camera(self):
-        """Deploy and configure the ZED camera module (real or fake based on replay_path)."""
-
-        if self.replay_path:
-            # Use FakeZEDModule for replay
-            from dimos.hardware.fake_zed_module import FakeZEDModule
-
-            logger.info(f"Deploying FakeZEDModule for replay from: {self.replay_path}")
-            self.zed_camera = self.dimos.deploy(
-                FakeZEDModule,
-                recording_path=self.replay_path,
-                frame_id="zed_camera",
-            )
-        else:
-            # Use real ZEDModule (with optional recording)
-            logger.info("Deploying ZED camera module...")
-            self.zed_camera = self.dimos.deploy(
-                ZEDModule,
-                camera_id=0,
-                resolution="HD720",
-                depth_mode="NEURAL",
-                fps=30,
-                enable_tracking=True,  # Enable for G1 pose estimation
-                enable_imu_fusion=True,
-                set_floor_as_origin=True,
-                publish_rate=30.0,
-                frame_id="zed_camera",
-                recording_path=self.recording_path,  # Pass recording path if provided
-            )
-
-        # Configure ZED LCM transports (same for both real and fake)
-        self.zed_camera.color_image.transport = core.pSHMTransport(
-            "/zed/color_image", default_capacity=DEFAULT_CAPACITY_COLOR_IMAGE
-        )
-        self.zed_camera.depth_image.transport = core.pSHMTransport(
-            "/zed/depth_image", default_capacity=DEFAULT_CAPACITY_DEPTH_IMAGE
+        """Deploy and configure a standard webcam module."""
+        logger.info("Deploying standard webcam module...")
+
+        self.camera = self.dimos.deploy(
+            CameraModule,
+            transform=Transform(
+                translation=Vector3(0.05, 0.0, 0.0),
+                rotation=Quaternion.from_euler(Vector3(0.0, 0.2, 0.0)),
+                frame_id="sensor",
+                child_frame_id="camera_link",
+            ),
+            hardware=lambda: Webcam(
+                camera_index=0,
+                frequency=15,
+                stereo_slice="left",
+                camera_info=zed.CameraInfo.SingleWebcam,
+            ),
         )
-        self.zed_camera.camera_info.transport = core.LCMTransport("/zed/camera_info", CameraInfo)
-        self.zed_camera.pose.transport = core.LCMTransport("/zed/pose", PoseStamped)
 
-        logger.info("ZED camera module configured")
+        self.camera.image.transport = core.LCMTransport("/image", Image)
+        self.camera.camera_info.transport = core.LCMTransport("/camera_info", CameraInfo)
+        logger.info("Webcam module configured")
 
     def _deploy_visualization(self):
         """Deploy and configure visualization modules."""
@@ -269,6 +368,20 @@ def _deploy_visualization(self):
             ]
         )
 
+    def _deploy_perception(self):
+        self.spatial_memory_module = self.dimos.deploy(
+            SpatialMemory,
+            collection_name=self.spatial_memory_collection,
+            db_path=self.db_path,
+            visual_memory_path=self.visual_memory_path,
+            output_dir=self.spatial_memory_dir,
+        )
+
+        self.spatial_memory_module.video.transport = core.LCMTransport("/image", Image)
+        self.spatial_memory_module.odom.transport = core.LCMTransport("/odom", PoseStamped)
+
+        logger.info("Spatial memory module deployed and connected")
+
     def _deploy_joystick(self):
         """Deploy joystick control module."""
         from dimos.robot.unitree_webrtc.g1_joystick_module import G1JoystickModule
@@ -297,9 +410,30 @@ def _deploy_ros_bridge(self):
             "/tf", TFMessage, ROSTFMessage, direction=BridgeDirection.ROS_TO_DIMOS
         )
 
-        # Add /registered_scan topic from ROS to DIMOS
+        from geometry_msgs.msg import PoseStamped as ROSPoseStamped
+        from std_msgs.msg import Bool as ROSBool
+
+        from dimos.msgs.std_msgs import Bool
+
+        # Navigation control topics from autonomy stack
+        self.ros_bridge.add_topic(
+            "/goal_pose", PoseStamped, ROSPoseStamped, direction=BridgeDirection.DIMOS_TO_ROS
+        )
+        self.ros_bridge.add_topic(
+            "/cancel_goal", Bool, ROSBool, direction=BridgeDirection.DIMOS_TO_ROS
+        )
+        self.ros_bridge.add_topic(
+            "/goal_reached", Bool, ROSBool, direction=BridgeDirection.ROS_TO_DIMOS
+        )
+
+        self.ros_bridge.add_topic("/joy", Joy, ROSJoy, direction=BridgeDirection.DIMOS_TO_ROS)
+
         self.ros_bridge.add_topic(
-            "/registered_scan", PointCloud2, ROSPointCloud2, direction=BridgeDirection.ROS_TO_DIMOS
+            "/registered_scan",
+            PointCloud2,
+            ROSPointCloud2,
+            direction=BridgeDirection.ROS_TO_DIMOS,
+            remap_topic="/map",
         )
 
         logger.info(
@@ -310,11 +444,17 @@ def _start_modules(self):
         """Start all deployed modules."""
         if self.connection:
             self.connection.start()
-        self.websocket_vis.start()
+        # self.websocket_vis.start()
         self.foxglove_bridge.start()
 
-        if self.joystick:
-            self.joystick.start()
+        # if self.joystick:
+        #    self.joystick.start()
+
+        self.camera.start()
+        self.detection.start()
+
+        if self.enable_perception:
+            self.spatial_memory_module.start()
 
         # Initialize skills after connection is established
         if self.skill_library is not None:
@@ -356,11 +496,16 @@ def shutdown(self):
 
         logger.info("UnitreeG1 shutdown complete")
 
+    @property
+    def spatial_memory(self) -> Optional[SpatialMemory]:
+        return self.spatial_memory_module
+
 
 def main():
     """Main entry point for testing."""
-    import os
     import argparse
+    import os
+
     from dotenv import load_dotenv
 
     load_dotenv()
@@ -368,7 +513,7 @@ def main():
     parser = argparse.ArgumentParser(description="Unitree G1 Humanoid Robot Control")
     parser.add_argument("--ip", default=os.getenv("ROBOT_IP"), help="Robot IP address")
     parser.add_argument("--joystick", action="store_true", help="Enable pygame joystick control")
-    parser.add_argument("--camera", action="store_true", help="Enable ZED camera module")
+    parser.add_argument("--camera", action="store_true", help="Enable usb camera module")
     parser.add_argument("--output-dir", help="Output directory for logs/data")
     parser.add_argument("--record", help="Path to save recording")
     parser.add_argument("--replay", help="Path to replay recording from")
@@ -386,9 +531,23 @@ def main():
         enable_camera=args.camera,
         enable_connection=os.getenv("ROBOT_IP") is not None,
         enable_ros_bridge=True,
+        enable_perception=True,
     )
     robot.start()
 
+    # time.sleep(7)
+    # print("Starting navigation...")
+    # print(
+    #     robot.nav.go_to(
+    #         PoseStamped(
+    #             ts=time.time(),
+    #             frame_id="map",
+    #             position=Vector3(0.0, 0.0, 0.03),
+    #             orientation=Quaternion(0, 0, 0, 0),
+    #         ),
+    #         timeout=10,
+    #     ),
+    # )
     try:
         if args.joystick:
             print("\n" + "=" * 50)
diff --git a/dimos/robot/unitree_webrtc/unitree_g1_skill_container.py b/dimos/robot/unitree_webrtc/unitree_g1_skill_container.py
new file mode 100644
index 0000000000..ff1bcfad1f
--- /dev/null
+++ b/dimos/robot/unitree_webrtc/unitree_g1_skill_container.py
@@ -0,0 +1,225 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Unitree G1 skill container for the new agents2 framework.
+Dynamically generates skills for G1 humanoid robot including arm controls and movement modes.
+"""
+
+from __future__ import annotations
+
+import datetime
+import time
+from typing import TYPE_CHECKING, Optional, Union
+
+from dimos.msgs.geometry_msgs import Twist, TwistStamped, Vector3
+from dimos.protocol.skill.skill import skill
+from dimos.protocol.skill.type import Reducer, Stream
+from dimos.robot.unitree_webrtc.unitree_skill_container import UnitreeSkillContainer
+from dimos.utils.logging_config import setup_logger
+
+if TYPE_CHECKING:
+    from dimos.robot.unitree_webrtc.unitree_g1 import UnitreeG1
+    from dimos.robot.unitree_webrtc.unitree_go2 import UnitreeGo2
+
+logger = setup_logger("dimos.robot.unitree_webrtc.unitree_g1_skill_container")
+
+# G1 Arm Actions - all use api_id 7106 on topic "rt/api/arm/request"
+G1_ARM_CONTROLS = [
+    ("Handshake", 27, "Perform a handshake gesture with the right hand."),
+    ("HighFive", 18, "Give a high five with the right hand."),
+    ("Hug", 19, "Perform a hugging gesture with both arms."),
+    ("HighWave", 26, "Wave with the hand raised high."),
+    ("Clap", 17, "Clap hands together."),
+    ("FaceWave", 25, "Wave near the face level."),
+    ("LeftKiss", 12, "Blow a kiss with the left hand."),
+    ("ArmHeart", 20, "Make a heart shape with both arms overhead."),
+    ("RightHeart", 21, "Make a heart gesture with the right hand."),
+    ("HandsUp", 15, "Raise both hands up in the air."),
+    ("XRay", 24, "Hold arms in an X-ray pose position."),
+    ("RightHandUp", 23, "Raise only the right hand up."),
+    ("Reject", 22, "Make a rejection or 'no' gesture."),
+    ("CancelAction", 99, "Cancel any current arm action and return hands to neutral position."),
+]
+
+# G1 Movement Modes - all use api_id 7101 on topic "rt/api/sport/request"
+G1_MODE_CONTROLS = [
+    ("WalkMode", 500, "Switch to normal walking mode."),
+    ("WalkControlWaist", 501, "Switch to walking mode with waist control."),
+    ("RunMode", 801, "Switch to running mode."),
+]
+
+
+class UnitreeG1SkillContainer(UnitreeSkillContainer):
+    """Container for Unitree G1 humanoid robot skills.
+
+    Inherits all Go2 skills and adds G1-specific arm controls and movement modes.
+    """
+
+    def __init__(self, robot: Optional[Union[UnitreeG1, UnitreeGo2]] = None):
+        """Initialize the skill container with robot reference.
+
+        Args:
+            robot: The UnitreeG1 or UnitreeGo2 robot instance
+        """
+        # Initialize parent class to get all base Unitree skills
+        super().__init__(robot)
+
+        # Add G1-specific skills on top
+        self._generate_arm_skills()
+        self._generate_mode_skills()
+
+    def _generate_arm_skills(self):
+        """Dynamically generate arm control skills from G1_ARM_CONTROLS list."""
+        logger.info(f"Generating {len(G1_ARM_CONTROLS)} G1 arm control skills")
+
+        for name, data_value, description in G1_ARM_CONTROLS:
+            skill_name = self._convert_to_snake_case(name)
+            self._create_arm_skill(skill_name, data_value, description, name)
+
+    def _generate_mode_skills(self):
+        """Dynamically generate movement mode skills from G1_MODE_CONTROLS list."""
+        logger.info(f"Generating {len(G1_MODE_CONTROLS)} G1 movement mode skills")
+
+        for name, data_value, description in G1_MODE_CONTROLS:
+            skill_name = self._convert_to_snake_case(name)
+            self._create_mode_skill(skill_name, data_value, description, name)
+
+    def _create_arm_skill(
+        self, skill_name: str, data_value: int, description: str, original_name: str
+    ):
+        """Create a dynamic arm control skill method with the @skill decorator.
+
+        Args:
+            skill_name: Snake_case name for the method
+            data_value: The arm action data value
+            description: Human-readable description
+            original_name: Original CamelCase name for display
+        """
+
+        def dynamic_skill_func(self) -> str:
+            """Dynamic arm skill function."""
+            return self._execute_arm_command(data_value, original_name)
+
+        # Set the function's metadata
+        dynamic_skill_func.__name__ = skill_name
+        dynamic_skill_func.__doc__ = description
+
+        # Apply the @skill decorator
+        decorated_skill = skill()(dynamic_skill_func)
+
+        # Bind the method to the instance
+        bound_method = decorated_skill.__get__(self, self.__class__)
+
+        # Add it as an attribute
+        setattr(self, skill_name, bound_method)
+
+        logger.debug(f"Generated arm skill: {skill_name} (data={data_value})")
+
+    def _create_mode_skill(
+        self, skill_name: str, data_value: int, description: str, original_name: str
+    ):
+        """Create a dynamic movement mode skill method with the @skill decorator.
+
+        Args:
+            skill_name: Snake_case name for the method
+            data_value: The mode data value
+            description: Human-readable description
+            original_name: Original CamelCase name for display
+        """
+
+        def dynamic_skill_func(self) -> str:
+            """Dynamic mode skill function."""
+            return self._execute_mode_command(data_value, original_name)
+
+        # Set the function's metadata
+        dynamic_skill_func.__name__ = skill_name
+        dynamic_skill_func.__doc__ = description
+
+        # Apply the @skill decorator
+        decorated_skill = skill()(dynamic_skill_func)
+
+        # Bind the method to the instance
+        bound_method = decorated_skill.__get__(self, self.__class__)
+
+        # Add it as an attribute
+        setattr(self, skill_name, bound_method)
+
+        logger.debug(f"Generated mode skill: {skill_name} (data={data_value})")
+
+    # ========== Override Skills for G1 ==========
+
+    @skill()
+    def move(self, x: float, y: float = 0.0, yaw: float = 0.0, duration: float = 0.0) -> str:
+        """Move the robot using direct velocity commands (G1 version with TwistStamped).
+
+        Args:
+            x: Forward velocity (m/s)
+            y: Left/right velocity (m/s)
+            yaw: Rotational velocity (rad/s)
+            duration: How long to move (seconds)
+        """
+        if self._robot is None:
+            return "Error: Robot not connected"
+
+        # G1 uses TwistStamped instead of Twist
+        twist_stamped = TwistStamped(linear=Vector3(x, y, 0), angular=Vector3(0, 0, yaw))
+        self._robot.move(twist_stamped, duration=duration)
+        return f"Started moving with velocity=({x}, {y}, {yaw}) for {duration} seconds"
+
+    # ========== Helper Methods ==========
+
+    def _execute_arm_command(self, data_value: int, name: str) -> str:
+        """Execute an arm command through WebRTC interface.
+
+        Args:
+            data_value: The arm action data value
+            name: Human-readable name of the command
+        """
+        if self._robot is None:
+            return f"Error: Robot not connected (cannot execute {name})"
+
+        try:
+            result = self._robot.connection.publish_request(
+                "rt/api/arm/request", {"api_id": 7106, "parameter": {"data": data_value}}
+            )
+            message = f"G1 arm action {name} executed successfully (data={data_value})"
+            logger.info(message)
+            return message
+        except Exception as e:
+            error_msg = f"Failed to execute G1 arm action {name}: {e}"
+            logger.error(error_msg)
+            return error_msg
+
+    def _execute_mode_command(self, data_value: int, name: str) -> str:
+        """Execute a movement mode command through WebRTC interface.
+
+        Args:
+            data_value: The mode data value
+            name: Human-readable name of the command
+        """
+        if self._robot is None:
+            return f"Error: Robot not connected (cannot execute {name})"
+
+        try:
+            result = self._robot.connection.publish_request(
+                "rt/api/sport/request", {"api_id": 7101, "parameter": {"data": data_value}}
+            )
+            message = f"G1 mode {name} activated successfully (data={data_value})"
+            logger.info(message)
+            return message
+        except Exception as e:
+            error_msg = f"Failed to execute G1 mode {name}: {e}"
+            logger.error(error_msg)
+            return error_msg
diff --git a/dimos/types/test_timestamped.py b/dimos/types/test_timestamped.py
index 44f359dbc7..a4617774ef 100644
--- a/dimos/types/test_timestamped.py
+++ b/dimos/types/test_timestamped.py
@@ -268,6 +268,7 @@ def test_time_window_collection():
     assert window.end_ts == 5.5
 
 
+@pytest.mark.tofix
 def test_timestamp_alignment():
     # Create a dedicated scheduler for this test to avoid thread leaks
     test_scheduler = ThreadPoolScheduler(max_workers=6)
diff --git a/dimos/types/test_weaklist.py b/dimos/types/test_weaklist.py
new file mode 100644
index 0000000000..fd0b05d74f
--- /dev/null
+++ b/dimos/types/test_weaklist.py
@@ -0,0 +1,163 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for WeakList implementation."""
+
+import gc
+import pytest
+from dimos.types.weaklist import WeakList
+
+
+class TestObject:
+    """Simple test object."""
+
+    def __init__(self, value):
+        self.value = value
+
+    def __repr__(self):
+        return f"TestObject({self.value})"
+
+
+def test_weaklist_basic_operations():
+    """Test basic append, iterate, and length operations."""
+    wl = WeakList()
+
+    # Add objects
+    obj1 = TestObject(1)
+    obj2 = TestObject(2)
+    obj3 = TestObject(3)
+
+    wl.append(obj1)
+    wl.append(obj2)
+    wl.append(obj3)
+
+    # Check length and iteration
+    assert len(wl) == 3
+    assert list(wl) == [obj1, obj2, obj3]
+
+    # Check contains
+    assert obj1 in wl
+    assert obj2 in wl
+    assert TestObject(4) not in wl
+
+
+def test_weaklist_auto_removal():
+    """Test that objects are automatically removed when garbage collected."""
+    wl = WeakList()
+
+    obj1 = TestObject(1)
+    obj2 = TestObject(2)
+    obj3 = TestObject(3)
+
+    wl.append(obj1)
+    wl.append(obj2)
+    wl.append(obj3)
+
+    assert len(wl) == 3
+
+    # Delete one object and force garbage collection
+    del obj2
+    gc.collect()
+
+    # Should only have 2 objects now
+    assert len(wl) == 2
+    assert list(wl) == [obj1, obj3]
+
+
+def test_weaklist_explicit_remove():
+    """Test explicit removal of objects."""
+    wl = WeakList()
+
+    obj1 = TestObject(1)
+    obj2 = TestObject(2)
+
+    wl.append(obj1)
+    wl.append(obj2)
+
+    # Remove obj1
+    wl.remove(obj1)
+    assert len(wl) == 1
+    assert obj1 not in wl
+    assert obj2 in wl
+
+    # Try to remove non-existent object
+    with pytest.raises(ValueError):
+        wl.remove(TestObject(3))
+
+
+def test_weaklist_indexing():
+    """Test index access."""
+    wl = WeakList()
+
+    obj1 = TestObject(1)
+    obj2 = TestObject(2)
+    obj3 = TestObject(3)
+
+    wl.append(obj1)
+    wl.append(obj2)
+    wl.append(obj3)
+
+    assert wl[0] is obj1
+    assert wl[1] is obj2
+    assert wl[2] is obj3
+
+    # Test index out of range
+    with pytest.raises(IndexError):
+        _ = wl[3]
+
+
+def test_weaklist_clear():
+    """Test clearing the list."""
+    wl = WeakList()
+
+    obj1 = TestObject(1)
+    obj2 = TestObject(2)
+
+    wl.append(obj1)
+    wl.append(obj2)
+
+    assert len(wl) == 2
+
+    wl.clear()
+    assert len(wl) == 0
+    assert obj1 not in wl
+
+
+def test_weaklist_iteration_during_modification():
+    """Test that iteration works even if objects are deleted during iteration."""
+    wl = WeakList()
+
+    objects = [TestObject(i) for i in range(5)]
+    for obj in objects:
+        wl.append(obj)
+
+    # Verify initial state
+    assert len(wl) == 5
+
+    # Iterate and check that we can safely delete objects
+    seen_values = []
+    for obj in wl:
+        seen_values.append(obj.value)
+        if obj.value == 2:
+            # Delete another object (not the current one)
+            del objects[3]  # Delete TestObject(3)
+            gc.collect()
+
+    # The object with value 3 gets garbage collected during iteration
+    # so we might not see it (depends on timing)
+    assert len(seen_values) in [4, 5]
+    assert all(v in [0, 1, 2, 3, 4] for v in seen_values)
+
+    # After iteration, the list should have 4 objects (one was deleted)
+    assert len(wl) == 4
diff --git a/dimos/types/weaklist.py b/dimos/types/weaklist.py
new file mode 100644
index 0000000000..8722455c66
--- /dev/null
+++ b/dimos/types/weaklist.py
@@ -0,0 +1,85 @@
+# Copyright 2025 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Weak reference list implementation that automatically removes dead references."""
+
+import weakref
+from typing import Any, Iterator, Optional
+
+
+class WeakList:
+    """A list that holds weak references to objects.
+
+    Objects are automatically removed when garbage collected.
+    Supports iteration, append, remove, and length operations.
+    """
+
+    def __init__(self):
+        self._refs = []
+
+    def append(self, obj: Any) -> None:
+        """Add an object to the list (stored as weak reference)."""
+
+        def _cleanup(ref):
+            try:
+                self._refs.remove(ref)
+            except ValueError:
+                pass
+
+        self._refs.append(weakref.ref(obj, _cleanup))
+
+    def remove(self, obj: Any) -> None:
+        """Remove an object from the list."""
+        for i, ref in enumerate(self._refs):
+            if ref() is obj:
+                del self._refs[i]
+                return
+        raise ValueError(f"{obj} not in WeakList")
+
+    def discard(self, obj: Any) -> None:
+        """Remove an object from the list if present, otherwise do nothing."""
+        try:
+            self.remove(obj)
+        except ValueError:
+            pass
+
+    def __iter__(self) -> Iterator[Any]:
+        """Iterate over live objects, skipping dead references."""
+        # Create a copy to avoid modification during iteration
+        for ref in self._refs[:]:
+            obj = ref()
+            if obj is not None:
+                yield obj
+
+    def __len__(self) -> int:
+        """Return count of live objects."""
+        return sum(1 for _ in self)
+
+    def __contains__(self, obj: Any) -> bool:
+        """Check if object is in the list."""
+        return any(ref() is obj for ref in self._refs)
+
+    def clear(self) -> None:
+        """Remove all references."""
+        self._refs.clear()
+
+    def __getitem__(self, index: int) -> Any:
+        """Get object at index (only counting live objects)."""
+        for i, obj in enumerate(self):
+            if i == index:
+                return obj
+        raise IndexError("WeakList index out of range")
+
+    def __repr__(self) -> str:
+        return f"WeakList({list(self)})"
diff --git a/dimos/utils/data.py b/dimos/utils/data.py
index 62ef6da851..0a2656ca82 100644
--- a/dimos/utils/data.py
+++ b/dimos/utils/data.py
@@ -12,18 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import glob
-import os
-import pickle
 import subprocess
 import tarfile
 from functools import cache
 from pathlib import Path
-from typing import Any, Callable, Generic, Iterator, Optional, Type, TypeVar, Union
-
-from reactivex import from_iterable, interval
-from reactivex import operators as ops
-from reactivex.observable import Observable
+from typing import Optional, Union
 
 
 @cache
@@ -38,7 +31,9 @@ def _get_repo_root() -> Path:
 
 
 @cache
-def _get_data_dir() -> Path:
+def _get_data_dir(extra_path: Optional[str] = None) -> Path:
+    if extra_path:
+        return _get_repo_root() / "data" / extra_path
     return _get_repo_root() / "data"
 
 
diff --git a/dimos/utils/reactive.py b/dimos/utils/reactive.py
index abc8512dfb..8ab8fe66ae 100644
--- a/dimos/utils/reactive.py
+++ b/dimos/utils/reactive.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 
 import threading
-from typing import Optional, TypeVar, Generic, Any, Callable
+from typing import Any, Callable, Generic, Optional, TypeVar
 
 import reactivex as rx
 from reactivex import operators as ops
-from reactivex.scheduler import ThreadPoolScheduler
 from reactivex.disposable import Disposable
 from reactivex.observable import Observable
+from reactivex.scheduler import ThreadPoolScheduler
 from rxpy_backpressure import BackPressure
 
 from dimos.utils.threadpool import get_scheduler
@@ -197,3 +197,33 @@ def spyfun(x):
         return x
 
     return ops.map(spyfun)
+
+
+def quality_barrier(quality_func: Callable[[T], float], target_frequency: float):
+    """
+    RxPY pipe operator that selects the highest quality item within each time window.
+
+    Args:
+        quality_func: Function to compute quality score for each item
+        target_frequency: Output frequency in Hz (e.g., 1.0 for 1 item per second)
+
+    Returns:
+        A pipe operator that can be used with .pipe()
+    """
+    window_duration = 1.0 / target_frequency  # Duration of each window in seconds
+
+    def _quality_barrier(source: Observable[T]) -> Observable[T]:
+        return source.pipe(
+            # Create time-based windows
+            ops.window_with_time(window_duration),
+            # For each window, find the highest quality item
+            ops.flat_map(
+                lambda window: window.pipe(
+                    ops.to_list(),
+                    ops.map(lambda items: max(items, key=quality_func) if items else None),
+                    ops.filter(lambda x: x is not None),
+                )
+            ),
+        )
+
+    return _quality_barrier
diff --git a/pyproject.toml b/pyproject.toml
index 7979293411..670f98a0b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,6 +111,7 @@ dependencies = [
 
 [project.scripts]
 lcmspy = "dimos.utils.cli.lcmspy.run_lcmspy:main"
+lcm-recorder = "dimos.utils.cli.recorder.run_recorder:main"
 foxglove-bridge = "dimos.utils.cli.foxglove_bridge.run_foxglove_bridge:main"
 skillspy = "dimos.utils.cli.skillspy.skillspy:main"
 agentspy = "dimos.utils.cli.agentspy.agentspy:main"