Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
6b6b9b2
local planner integration with new webrtc interface
alexlin2 May 16, 2025
9456080
commenting out cuda stuff
leshy May 22, 2025
e4fc046
commenting out aionice from requirements as an experiment
leshy May 22, 2025
fa4e6e3
local planner working and tested with webrtc interface
alexlin2 Jun 1, 2025
613935f
added local costmap to map type, fixed recovery logic to be more robust
alexlin2 Jun 2, 2025
03e9b82
to be squashed
alexlin2 Jun 6, 2025
54b5236
bug fixes, local planner now is integrated with dev
alexlin2 Jun 7, 2025
700013e
Major refactor, moved all connection types into an abstract class cal…
alexlin2 Jun 7, 2025
875ce57
CI code cleanup
alexlin2 Jun 7, 2025
f669ff1
bug fix and bigger tolerance on angle
alexlin2 Jun 7, 2025
819d912
added a navigation only run file
alexlin2 Jun 7, 2025
b03b14f
added back object detection stream, now without dependency on ros tra…
alexlin2 Jun 8, 2025
39e22c8
added all unitree skills support for webrtc
alexlin2 Jun 9, 2025
c714906
most stupid bug ever
alexlin2 Jun 10, 2025
22d4ab1
bug fix, removed backpressure all together
alexlin2 Jun 10, 2025
9f0cd5a
added local planner changes
alexlin2 Jun 11, 2025
7796752
initial implementation of frontier exploration
alexlin2 Jun 11, 2025
79a0d0f
added qwen frontier predictor
alexlin2 Jun 11, 2025
ca4d9ea
Quick fix Qwen VL Max implementation for single_frame_query
spomichter Jun 11, 2025
ed39c13
Fix CLIP navigation threshold from LLM set to hardcoded
spomichter Jun 11, 2025
d23f9a7
Temporarily comment out non-essential Unitree skills for Cerebras con…
spomichter Jun 11, 2025
1b1f7e4
Integrated Cerebras to run.py
spomichter Jun 11, 2025
f6ca110
local planner integration with new webrtc interface
alexlin2 May 16, 2025
e6cfe33
commenting out cuda stuff
leshy May 22, 2025
ce73101
commenting out aionice from requirements as an experiment
leshy May 22, 2025
79dd910
local planner working and tested with webrtc interface
alexlin2 Jun 1, 2025
31190b9
added local costmap to map type, fixed recovery logic to be more robust
alexlin2 Jun 2, 2025
57b7bca
bug fixes, local planner now is integrated with dev
alexlin2 Jun 7, 2025
923619f
Major refactor, moved all connection types into an abstract class cal…
alexlin2 Jun 7, 2025
f08913d
CI code cleanup
alexlin2 Jun 7, 2025
fe2ffb5
bug fix and bigger tolerance on angle
alexlin2 Jun 7, 2025
1833d42
added a navigation only run file
alexlin2 Jun 7, 2025
f369e7a
added back object detection stream, now without dependency on ros tra…
alexlin2 Jun 8, 2025
7489a96
added all unitree skills support for webrtc
alexlin2 Jun 9, 2025
e3d781b
most stupid bug ever
alexlin2 Jun 10, 2025
d1ba9d9
bug fix, removed backpressure all together
alexlin2 Jun 10, 2025
b7c489f
initial implementation of frontier exploration
alexlin2 Jun 11, 2025
02f54a2
added qwen frontier predictor
alexlin2 Jun 11, 2025
237f365
improved wavefront detector
alexlin2 Jun 11, 2025
1668238
explored skill with frontier exploration fully working, many local pl…
alexlin2 Jun 12, 2025
03e54bf
No need to click start on controller
alexlin2 Jun 12, 2025
21bdafc
Temp changes to Agent, added append_to_history method
spomichter Jun 12, 2025
0a474b3
Created new observe skill, updated observestream
spomichter Jun 12, 2025
94c9274
Working runfile, cerebras updates
spomichter Jun 12, 2025
43b3bcf
bug fix with frontier sizes
alexlin2 Jun 12, 2025
807b874
two tiny bugs
alexlin2 Jun 12, 2025
0d44ce6
fixed frontier detector scoring mechanism
alexlin2 Jun 12, 2025
96d904d
check if attempts exceed maximum
alexlin2 Jun 12, 2025
9257b68
Navigation needs to terminate
alexlin2 Jun 12, 2025
c0d8675
switched back to claude, fixed local planner orientation bug
alexlin2 Jun 21, 2025
38170e0
removed local_costmap from map.py
alexlin2 Jun 21, 2025
1e5fdfc
removed_external_packages
alexlin2 Jun 21, 2025
e77a734
reverted some changes
alexlin2 Jun 21, 2025
fb39de8
got rid of ROS OcuupancyGrid in some instances
alexlin2 Jun 21, 2025
bf911e7
use qwen 2.5
alexlin2 Jun 21, 2025
22c0016
put standup in the robot initialization so that the lidar stream does…
alexlin2 Jun 21, 2025
61e73a3
changed voxel_size back to 0.2
alexlin2 Jun 22, 2025
299e408
added tests for frontier exploration
alexlin2 Jun 22, 2025
99a6db9
temp fix for observe stream
alexlin2 Jun 22, 2025
188396a
moved wavefront into its own class
alexlin2 Jun 25, 2025
5774d60
revert changes made to global planner
alexlin2 Jun 25, 2025
684330c
updated all comments
alexlin2 Jun 30, 2025
225ecce
fixed all comments
alexlin2 Jul 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ __pycache__

# Ignore default runtime output folder
/assets/output/
/assets/rgbd_data/
/assets/saved_maps/
/assets/model-cache/
assets/agent/memory.txt
/assets/agent/memory.txt

.bash_history

Expand All @@ -33,3 +35,8 @@ package-lock.json

# Ignore build artifacts
dist/

# Ignore data and modelfiles
data/
FastSAM-x.pt
yolo11n.pt
24 changes: 8 additions & 16 deletions dimos/perception/detection2d/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

import numpy as np
import cv2

from dimos.utils.ros_utils import distance_angle_to_goal_xy
from dimos.types.vector import Vector
from dimos.utils.transform_utils import distance_angle_to_goal_xy


def filter_detections(
Expand Down Expand Up @@ -206,24 +206,19 @@ def plot_results(image, bboxes, track_ids, class_ids, confidences, names, alpha=
return vis_img


def calculate_depth_from_bbox(depth_model, frame, bbox):
def calculate_depth_from_bbox(depth_map, bbox):
"""
Calculate the average depth of an object within a bounding box.
Uses the 25th to 75th percentile range to filter outliers.

Args:
depth_model: Depth model
frame: The image frame
depth_map: The depth map
bbox: Bounding box in format [x1, y1, x2, y2]

Returns:
float: Average depth in meters, or None if depth estimation fails
"""
try:
# Get depth map for the entire frame
depth_map = depth_model.infer_depth(frame)
depth_map = np.array(depth_map)

# Extract region of interest from the depth map
x1, y1, x2, y2 = map(int, bbox)
roi_depth = depth_map[y1:y2, x1:x2]
Expand Down Expand Up @@ -323,7 +318,8 @@ def calculate_position_rotation_from_bbox(bbox, depth, camera_intrinsics):
camera_intrinsics: List [fx, fy, cx, cy] with camera parameters

Returns:
Tuple of (position_dict, rotation_dict)
Vector: position
Vector: rotation
"""
# Calculate distance and angle to object
distance, angle = calculate_distance_angle_from_bbox(bbox, depth, camera_intrinsics)
Expand All @@ -336,11 +332,7 @@ def calculate_position_rotation_from_bbox(bbox, depth, camera_intrinsics):
# For now, rotation is only in yaw (around z-axis)
# We can use the negative of the angle as an estimate of the object's yaw
# assuming objects tend to face the camera
position = {"x": x, "y": y, "z": 0.0} # z=0 assuming objects are on the ground
rotation = {
"roll": 0.0,
"pitch": 0.0,
"yaw": -angle,
} # Only yaw is meaningful with monocular camera
position = Vector([x, y, 0.0])
rotation = Vector([0.0, 0.0, -angle])

return position, rotation
31 changes: 18 additions & 13 deletions dimos/perception/object_detection_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import cv2
import time
import numpy as np
from reactivex import Observable
from reactivex import operators as ops
Expand All @@ -26,8 +27,9 @@
calculate_position_rotation_from_bbox,
)
from dimos.types.vector import Vector
from typing import Optional, Union
from typing import Optional, Union, Callable
from dimos.types.manipulation import ObjectData
from dimos.utils.transform_utils import transform_robot_to_map

from dimos.utils.logging_config import setup_logger

Expand All @@ -54,7 +56,7 @@ def __init__(
gt_depth_scale=1000.0,
min_confidence=0.7,
class_filter=None, # Optional list of class names to filter (e.g., ["person", "car"])
transform_to_map=None, # Optional function to transform coordinates to map frame
get_pose: Callable = None, # Optional function to transform coordinates to map frame
detector: Optional[Union[Detic2DDetector, Yolo2DDetector]] = None,
video_stream: Observable = None,
disable_depth: bool = False, # Flag to disable monocular Metric3D depth estimation
Expand All @@ -69,15 +71,15 @@ def __init__(
gt_depth_scale: Ground truth depth scale for Metric3D
min_confidence: Minimum confidence for detections
class_filter: Optional list of class names to filter
transform_to_map: Optional function to transform pose to map coordinates
get_pose: Optional function to transform pose to map coordinates
detector: Optional detector instance (Detic or Yolo)
video_stream: Observable of video frames to process (if provided, returns a stream immediately)
disable_depth: Flag to disable monocular Metric3D depth estimation
draw_masks: Flag to enable drawing segmentation masks
"""
self.min_confidence = min_confidence
self.class_filter = class_filter
self.transform_to_map = transform_to_map
self.get_pose = get_pose
self.disable_depth = disable_depth
self.draw_masks = draw_masks
# Initialize object detector
Expand Down Expand Up @@ -131,6 +133,11 @@ def process_frame(frame):

# Process detections
objects = []
if not self.disable_depth:
depth_map = self.depth_model.infer_depth(frame)
depth_map = np.array(depth_map)
else:
depth_map = None

for i, bbox in enumerate(bboxes):
# Skip if confidence is too low
Expand All @@ -142,9 +149,9 @@ def process_frame(frame):
if self.class_filter and class_name not in self.class_filter:
continue

if not self.disable_depth:
if not self.disable_depth and depth_map is not None:
# Get depth for this object
depth = calculate_depth_from_bbox(self.depth_model, frame, bbox)
depth = calculate_depth_from_bbox(depth_map, bbox)
if depth is None:
# Skip objects with invalid depth
continue
Expand All @@ -159,13 +166,11 @@ def process_frame(frame):

# Transform to map frame if a transform function is provided
try:
if self.transform_to_map:
position = Vector([position["x"], position["y"], position["z"]])
rotation = Vector(
[rotation["roll"], rotation["pitch"], rotation["yaw"]]
)
position, rotation = self.transform_to_map(
position, rotation, source_frame="base_link"
if self.get_pose:
# position and rotation are already Vector objects, no need to convert
robot_pose = self.get_pose()
position, rotation = transform_robot_to_map(
robot_pose, position, rotation
)
except Exception as e:
logger.error(f"Error transforming to map frame: {e}")
Expand Down
4 changes: 3 additions & 1 deletion dimos/perception/object_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ def track(self, bbox, frame=None, distance=None, size=None):

# Calculate depth only if distance and size not provided
if frame is not None and distance is None and size is None:
depth_estimate = calculate_depth_from_bbox(self.depth_model, frame, bbox)
depth_map = self.depth_model.infer_depth(frame)
depth_map = np.array(depth_map)
depth_estimate = calculate_depth_from_bbox(depth_map, bbox)
if depth_estimate is not None:
print(f"Estimated depth for object: {depth_estimate:.2f}m")

Expand Down
14 changes: 6 additions & 8 deletions dimos/perception/spatial_perception.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,7 @@ def __init__(
"VisualMemory"
] = None, # Optional VisualMemory instance for storing images
video_stream: Optional[Observable] = None, # Video stream to process
transform_provider: Optional[
callable
] = None, # Function that returns position and rotation
get_pose: Optional[callable] = None, # Function that returns position and rotation
):
"""
Initialize the spatial perception system.
Expand Down Expand Up @@ -162,8 +160,8 @@ def __init__(
logger.info(f"SpatialMemory initialized with model {embedding_model}")

# Start processing video stream if provided
if video_stream is not None and transform_provider is not None:
self.start_continuous_processing(video_stream, transform_provider)
if video_stream is not None and get_pose is not None:
self.start_continuous_processing(video_stream, get_pose)

def query_by_location(
self, x: float, y: float, radius: float = 2.0, limit: int = 5
Expand All @@ -183,14 +181,14 @@ def query_by_location(
return self.vector_db.query_by_location(x, y, radius, limit)

def start_continuous_processing(
self, video_stream: Observable, transform_provider: callable
self, video_stream: Observable, get_pose: callable
) -> disposable.Disposable:
"""
Start continuous processing of video frames from an Observable stream.

Args:
video_stream: Observable of video frames
transform_provider: Callable that returns position and rotation for each frame
get_pose: Callable that returns position and rotation for each frame

Returns:
Disposable subscription that can be used to stop processing
Expand All @@ -200,7 +198,7 @@ def start_continuous_processing(

# Map each video frame to include transform data
combined_stream = video_stream.pipe(
ops.map(lambda video_frame: {"frame": video_frame, **transform_provider()}),
ops.map(lambda video_frame: {"frame": video_frame, **get_pose()}),
# Filter out bad transforms
ops.filter(
lambda data: data.get("position") is not None and data.get("rotation") is not None
Expand Down
78 changes: 0 additions & 78 deletions dimos/robot/abstract_robot.py

This file was deleted.

70 changes: 70 additions & 0 deletions dimos/robot/connection_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright 2025 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from abc import ABC, abstractmethod
from typing import Optional
from reactivex.observable import Observable
from dimos.types.vector import Vector

__all__ = ["ConnectionInterface"]


class ConnectionInterface(ABC):
"""Abstract base class for robot connection interfaces.

This class defines the minimal interface that all connection types (ROS, WebRTC, etc.)
must implement to provide robot control and data streaming capabilities.
"""

@abstractmethod
def move(self, velocity: Vector, duration: float = 0.0) -> bool:
"""Send movement command to the robot using velocity commands.

Args:
velocity: Velocity vector [x, y, yaw] where:
x: Forward/backward velocity (m/s)
y: Left/right velocity (m/s)
yaw: Rotational velocity (rad/s)
duration: How long to move (seconds). If 0, command is continuous

Returns:
bool: True if command was sent successfully
"""
pass

@abstractmethod
def get_video_stream(self, fps: int = 30) -> Optional[Observable]:
"""Get the video stream from the robot's camera.

Args:
fps: Frames per second for the video stream

Returns:
Observable: An observable stream of video frames or None if not available
"""
pass

@abstractmethod
def stop(self) -> bool:
"""Stop the robot's movement.

Returns:
bool: True if stop command was sent successfully
"""
pass

@abstractmethod
def disconnect(self) -> None:
"""Disconnect from the robot and clean up resources."""
pass
1 change: 1 addition & 0 deletions dimos/robot/frontier_exploration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from utils import *
Loading