Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
b432113
timedsensorreplay seek and loop, navigation init extracted
leshy Aug 28, 2025
67e20ea
sharpness window for image replay
leshy Aug 28, 2025
defd1a6
testing find frame, single frame test
leshy Aug 28, 2025
ef4cf41
transform/pointcloud fitler sketch
leshy Aug 28, 2025
8c7949a
working test
leshy Aug 28, 2025
433be3b
pointcloud2 from_numpy
leshy Aug 28, 2025
c7f55dd
detection experiments
leshy Aug 28, 2025
a590623
single frame module test
leshy Sep 14, 2025
311d01d
detection module single frame test
leshy Sep 14, 2025
89264db
transform to matrix, small cleanup
leshy Sep 14, 2025
5cac758
statistical cleanup
leshy Sep 14, 2025
dd29117
general hidden point filtering
leshy Sep 14, 2025
9ed3292
raycast tests
leshy Sep 14, 2025
4220e94
checkpoint
leshy Sep 14, 2025
46de8a1
detection class rewrite
leshy Sep 15, 2025
7d776c8
implementing dedicated Detection2D type
leshy Sep 15, 2025
5a7151e
further work on detection type
leshy Sep 15, 2025
d5916c3
type cleanup
leshy Sep 15, 2025
4beae99
detection2d, detection3d type checkpoint
leshy Sep 15, 2025
da6dac2
temp checkpoint
leshy Sep 15, 2025
3402f88
better replay
leshy Sep 15, 2025
44a562b
Merge branch 'dev' into spatial-object-localization
leshy Sep 16, 2025
9f1f27b
merge fixes
leshy Sep 16, 2025
47c334a
fixed replay
leshy Sep 16, 2025
0316e54
split into module2d and module3d
leshy Sep 16, 2025
d3f2219
better imageannotations
leshy Sep 16, 2025
6af57af
better image->detection association models
leshy Sep 16, 2025
74e5201
major structure cleanup
leshy Sep 16, 2025
f589fc5
full integration of new structure
leshy Sep 16, 2025
9e3ed7b
str repr
leshy Sep 16, 2025
f413e26
center, distance calculations
leshy Sep 16, 2025
3ec45f1
checkpoint
leshy Sep 16, 2025
1e84631
db module, objects carry images
leshy Sep 16, 2025
3f00223
emitting cropped image
leshy Sep 16, 2025
842cb95
timestamps timestamps timestamps
leshy Sep 16, 2025
097d9ad
checkpoint
leshy Sep 16, 2025
5d8529f
odom and lidar/image timestamps are mismatched in go2 client
leshy Sep 17, 2025
bd71879
working detection3d
leshy Sep 18, 2025
b141d6a
Merge branch 'dev' into spatial-object-localization
leshy Sep 18, 2025
856edc1
mock agent test fix
leshy Sep 18, 2025
e0db705
detector tests, stream tests fix
leshy Sep 18, 2025
a1d49cb
type checks passing
leshy Sep 18, 2025
c9db047
module detection tests
leshy Sep 18, 2025
fefd095
removing unused files
leshy Sep 18, 2025
4a98ac8
comment cleanup
leshy Sep 18, 2025
725be30
pr review issues
leshy Sep 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions data/.lfs/unitree_go2_lidar_corrected.tar.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions data/.lfs/unitree_go2_office_walk2.tar.gz
Git LFS file not shown
10 changes: 0 additions & 10 deletions dimos/agents2/test_mock_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
from dimos.msgs.foxglove_msgs import ImageAnnotations
from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Vector3
from dimos.msgs.sensor_msgs import Image
from dimos.msgs.vision_msgs import Detection2DArray
from dimos.perception.detection2d import Detect2DModule
from dimos.protocol.skill.test_coordinator import SkillContainerTest
from dimos.robot.unitree_webrtc.modular.connection_module import ConnectionModule
from dimos.robot.unitree_webrtc.type.lidar import LidarMessage
Expand Down Expand Up @@ -167,15 +165,8 @@ def test_tool_call_implicit_detections():
robot_connection.camera_info.transport = LCMTransport("/camera_info", CameraInfo)
robot_connection.start()

detect2d = dimos.deploy(Detect2DModule)
detect2d.detections.transport = LCMTransport("/detections", Detection2DArray)
detect2d.annotations.transport = LCMTransport("/annotations", ImageAnnotations)
detect2d.image.connect(robot_connection.video)
detect2d.start()

test_skill_module = dimos.deploy(SkillContainerTest)

agent.register_skills(detect2d)
agent.register_skills(test_skill_module)
agent.start()

Expand Down Expand Up @@ -208,5 +199,4 @@ def test_tool_call_implicit_detections():
agent.stop()
test_skill_module.stop()
robot_connection.stop()
detect2d.stop()
dimos.stop()
9 changes: 6 additions & 3 deletions dimos/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

import dimos.core.colors as colors
from dimos.core.core import rpc
from dimos.core.module import Module, ModuleBase
from dimos.core.module import Module, ModuleBase, ModuleConfig
from dimos.core.stream import In, Out, RemoteIn, RemoteOut, Transport
from dimos.core.transport import (
LCMTransport,
SHMTransport,
ZenohTransport,
pLCMTransport,
SHMTransport,
pSHMTransport,
)
from dimos.protocol.rpc.lcmrpc import LCMRPC
Expand Down Expand Up @@ -99,7 +99,10 @@ def rpc_call(*args, **kwargs):
return self.actor_instance.__getattr__(name)


def patchdask(dask_client: Client, local_cluster: LocalCluster) -> Client:
DimosCluster = Client


def patchdask(dask_client: Client, local_cluster: LocalCluster) -> DimosCluster:
def deploy(
actor_class,
*args,
Expand Down
6 changes: 3 additions & 3 deletions dimos/core/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,12 @@ class Transport(ObservableMixin[T]):
# used by local Output
def broadcast(self, selfstream: Out[T], value: T): ...

def publish(self, msg: T):
self.broadcast(None, msg)

# used by local Input
def subscribe(self, selfstream: In[T], callback: Callable[[T], any]) -> None: ...

def publish(self, *args, **kwargs):
return self.broadcast(*args, **kwargs)


class Stream(Generic[T]):
_transport: Optional[Transport]
Expand Down
11 changes: 11 additions & 0 deletions dimos/msgs/foxglove_msgs/ImageAnnotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@


class ImageAnnotations(FoxgloveImageAnnotations):
def __add__(self, other: "ImageAnnotations") -> "ImageAnnotations":
points = self.points + other.points
texts = self.texts + other.texts

return ImageAnnotations(
texts=texts,
texts_length=len(texts),
points=points,
points_length=len(points),
)

def agent_encode(self) -> str:
if len(self.texts) == 0:
return None
Expand Down
31 changes: 31 additions & 0 deletions dimos/msgs/geometry_msgs/Transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,37 @@ def to_pose(self) -> "PoseStamped":
frame_id=self.frame_id,
)

def to_matrix(self) -> "np.ndarray":
"""Convert Transform to a 4x4 transformation matrix.

Returns a homogeneous transformation matrix that represents both
the rotation and translation of this transform.

Returns:
np.ndarray: A 4x4 homogeneous transformation matrix
"""
import numpy as np

# Extract quaternion components
x, y, z, w = self.rotation.x, self.rotation.y, self.rotation.z, self.rotation.w

# Build rotation matrix from quaternion using standard formula
# This avoids numerical issues compared to converting to axis-angle first
rotation_matrix = np.array(
[
[1 - 2 * (y * y + z * z), 2 * (x * y - z * w), 2 * (x * z + y * w)],
[2 * (x * y + z * w), 1 - 2 * (x * x + z * z), 2 * (y * z - x * w)],
[2 * (x * z - y * w), 2 * (y * z + x * w), 1 - 2 * (x * x + y * y)],
]
)

# Build 4x4 homogeneous transformation matrix
matrix = np.eye(4)
matrix[:3, :3] = rotation_matrix
matrix[:3, 3] = [self.translation.x, self.translation.y, self.translation.z]

return matrix

def lcm_encode(self) -> bytes:
# we get a circular import otherwise
from dimos.msgs.tf2_msgs.TFMessage import TFMessage
Expand Down
4 changes: 4 additions & 0 deletions dimos/msgs/geometry_msgs/Vector3.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,10 @@ def cross(self, other: VectorConvertable | Vector3) -> Vector3:
self.x * other_vector.y - self.y * other_vector.x,
)

def magnitude(self) -> float:
"""Alias for length()."""
return self.length()

def length(self) -> float:
"""Compute the Euclidean length (magnitude) of the vector."""
return float(np.sqrt(self.x * self.x + self.y * self.y + self.z * self.z))
Expand Down
11 changes: 8 additions & 3 deletions dimos/msgs/sensor_msgs/Image.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from reactivex.observable import Observable
from reactivex.scheduler import ThreadPoolScheduler

from dimos.types.timestamped import Timestamped, TimestampedBufferCollection
from dimos.types.timestamped import Timestamped, TimestampedBufferCollection, to_human_readable


class ImageFormat(Enum):
Expand Down Expand Up @@ -65,6 +65,9 @@ class Image(Timestamped):
frame_id: str = field(default="")
ts: float = field(default_factory=time.time)

def __str__(self):
return f"Image(shape={self.shape}, format={self.format}, dtype={self.dtype}, ts={to_human_readable(self.ts)})"

def __post_init__(self):
"""Validate image data and format."""
if self.data is None:
Expand Down Expand Up @@ -527,8 +530,10 @@ def sharpness_window(target_frequency: float, source: Observable[Image]) -> Obse
def find_best(*argv):
if not window._items:
return None
return max(window._items, key=lambda x: x.sharpness())

found = max(window._items, key=lambda x: x.sharpness())
return found

return rx.interval(1.0 / target_frequency).pipe(
ops.observe_on(thread_scheduler), ops.map(find_best)
ops.observe_on(thread_scheduler), ops.map(find_best), ops.filter(lambda x: x is not None)
)
97 changes: 95 additions & 2 deletions dimos/msgs/sensor_msgs/PointCloud2.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,34 @@ class PointCloud2(Timestamped):
def __init__(
self,
pointcloud: o3d.geometry.PointCloud = None,
frame_id: str = "",
frame_id: str = "world",
ts: Optional[float] = None,
):
self.ts = ts if ts is not None else time.time()
self.ts = ts
self.pointcloud = pointcloud if pointcloud is not None else o3d.geometry.PointCloud()
self.frame_id = frame_id

@classmethod
def from_numpy(
cls, points: np.ndarray, frame_id: str = "world", timestamp: Optional[float] = None
) -> PointCloud2:
"""Create PointCloud2 from numpy array of shape (N, 3).

Args:
points: Nx3 numpy array of 3D points
frame_id: Frame ID for the point cloud
timestamp: Timestamp for the point cloud (defaults to current time)

Returns:
PointCloud2 instance
"""
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
return cls(pointcloud=pcd, ts=timestamp, frame_id=frame_id)

def points(self):
return self.pointcloud.points

# TODO what's the usual storage here? is it already numpy?
def as_numpy(self) -> np.ndarray:
"""Get points as numpy array."""
Expand Down Expand Up @@ -208,6 +229,78 @@ def __len__(self) -> int:
"""Return number of points."""
return len(self.pointcloud.points)

def filter_by_height(
self,
min_height: Optional[float] = None,
max_height: Optional[float] = None,
) -> "PointCloud2":
"""Filter points based on their height (z-coordinate).

This method creates a new PointCloud2 containing only points within the specified
height range. All metadata (frame_id, timestamp) is preserved.

Args:
min_height: Optional minimum height threshold. Points with z < min_height are filtered out.
If None, no lower limit is applied.
max_height: Optional maximum height threshold. Points with z > max_height are filtered out.
If None, no upper limit is applied.

Returns:
New PointCloud2 instance containing only the filtered points.

Raises:
ValueError: If both min_height and max_height are None (no filtering would occur).

Example:
# Remove ground points below 0.1m height
filtered_pc = pointcloud.filter_by_height(min_height=0.1)

# Keep only points between ground level and 2m height
filtered_pc = pointcloud.filter_by_height(min_height=0.0, max_height=2.0)

# Remove points above 1.5m (e.g., ceiling)
filtered_pc = pointcloud.filter_by_height(max_height=1.5)
"""
# Validate that at least one threshold is provided
if min_height is None and max_height is None:
raise ValueError("At least one of min_height or max_height must be specified")

# Get points as numpy array
points = self.as_numpy()

if len(points) == 0:
# Empty pointcloud - return a copy
return PointCloud2(
pointcloud=o3d.geometry.PointCloud(),
frame_id=self.frame_id,
ts=self.ts,
)

# Extract z-coordinates (height values) - column index 2
heights = points[:, 2]

# Create boolean mask for filtering based on height thresholds
# Start with all True values
mask = np.ones(len(points), dtype=bool)

# Apply minimum height filter if specified
if min_height is not None:
mask &= heights >= min_height

# Apply maximum height filter if specified
if max_height is not None:
mask &= heights <= max_height

# Apply mask to filter points
filtered_points = points[mask]

# Create new PointCloud2 with filtered points
return PointCloud2.from_numpy(
points=filtered_points,
frame_id=self.frame_id,
timestamp=self.ts,
)

def __repr__(self) -> str:
"""String representation."""
return f"PointCloud(points={len(self)}, frame_id='{self.frame_id}', ts={self.ts})"
7 changes: 6 additions & 1 deletion dimos/perception/detection2d/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from dimos.perception.detection2d.module import Detect2DModule
from dimos.perception.detection2d.module2D import (
Detection2DModule,
)
from dimos.perception.detection2d.module3D import (
Detection3DModule,
)
from dimos.perception.detection2d.utils import *
from dimos.perception.detection2d.yolo_2d_det import *
Loading
Loading