Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ecce14e
sam bug fix
alexlin2 Jul 28, 2025
e341512
commiting this before it gets too disgusting
alexlin2 Jul 30, 2025
5a474cf
abomination
alexlin2 Jul 31, 2025
7d24014
abomination 2
alexlin2 Aug 1, 2025
d394a79
fixed the dumbest bug ever, double gradient
alexlin2 Aug 1, 2025
866ec68
fixed local planner and transform bug
alexlin2 Aug 2, 2025
4c6bc04
cleaned up navigator, make set_goal blocking
alexlin2 Aug 4, 2025
05684ba
removed deprecated code to pass tests
alexlin2 Aug 5, 2025
0a5f392
more updates to pass tests
alexlin2 Aug 5, 2025
692b3e2
update spatial memory to use new types
alexlin2 Aug 5, 2025
b98c412
fixed goal checking
alexlin2 Aug 5, 2025
4e6cb79
Replaced costmap type with OccupancyGrid, fixed ros tests
spomichter Aug 6, 2025
9145801
Test revert spatial memory test - CI hanging
spomichter Aug 6, 2025
e32f90b
tests for transform utils
alexlin2 Aug 6, 2025
e2710e0
cleanup and added integration test
alexlin2 Aug 6, 2025
ba0b7f1
bring back spatial memory test and fix bug in connection module
alexlin2 Aug 6, 2025
d112b80
added safe goal selector and fixed bugs
alexlin2 Aug 7, 2025
834c511
Fixed spatial memory CPU constraint with multiple ImageEmbedding init…
spomichter Aug 7, 2025
7aa8d59
added transform util changes to pass tests
alexlin2 Aug 7, 2025
4e653e5
more changes to pass tests
alexlin2 Aug 7, 2025
e30dbb2
more changes to pass tests
alexlin2 Aug 7, 2025
2790953
Only use one SpatialMemory instance in entire test
spomichter Aug 7, 2025
76905a9
pass module tests
alexlin2 Aug 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions dimos/agents/memory/image_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(self, model_name: str = "clip", dimensions: int = 512):
self.dimensions = dimensions
self.model = None
self.processor = None
self.model_path = None

self._initialize_model()

Expand All @@ -68,10 +69,16 @@ def _initialize_model(self):

if self.model_name == "clip":
model_id = get_data("models_clip") / "model.onnx"
self.model_path = str(model_id) # Store for pickling
processor_id = "openai/clip-vit-base-patch32"
self.model = ort.InferenceSession(model_id)

providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]

self.model = ort.InferenceSession(str(model_id), providers=providers)

actual_providers = self.model.get_providers()
self.processor = CLIPProcessor.from_pretrained(processor_id)
logger.info(f"Loaded CLIP model: {model_id}")
logger.info(f"Loaded CLIP model: {model_id} with providers: {actual_providers}")
elif self.model_name == "resnet":
model_id = "microsoft/resnet-50"
self.model = AutoModel.from_pretrained(model_id)
Expand Down
17 changes: 13 additions & 4 deletions dimos/agents/memory/spatial_vector_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ class SpatialVectorDB:
"""

def __init__(
self, collection_name: str = "spatial_memory", chroma_client=None, visual_memory=None
self,
collection_name: str = "spatial_memory",
chroma_client=None,
visual_memory=None,
embedding_provider=None,
):
"""
Initialize the spatial vector database.
Expand All @@ -47,6 +51,7 @@ def __init__(
collection_name: Name of the vector database collection
chroma_client: Optional ChromaDB client for persistence. If None, an in-memory client is used.
visual_memory: Optional VisualMemory instance for storing images. If None, a new one is created.
embedding_provider: Optional ImageEmbeddingProvider instance for computing embeddings. If None, one will be created.
"""
self.collection_name = collection_name

Expand Down Expand Up @@ -77,6 +82,9 @@ def __init__(
# Use provided visual memory or create a new one
self.visual_memory = visual_memory if visual_memory is not None else VisualMemory()

# Store the embedding provider to reuse for all operations
self.embedding_provider = embedding_provider

# Log initialization info with details about whether using existing collection
client_type = "persistent" if chroma_client is not None else "in-memory"
try:
Expand Down Expand Up @@ -223,11 +231,12 @@ def query_by_text(self, text: str, limit: int = 5) -> List[Dict]:
Returns:
List of results, each containing the image, its metadata, and similarity score
"""
from dimos.agents.memory.image_embedding import ImageEmbeddingProvider
if self.embedding_provider is None:
from dimos.agents.memory.image_embedding import ImageEmbeddingProvider

embedding_provider = ImageEmbeddingProvider(model_name="clip")
self.embedding_provider = ImageEmbeddingProvider(model_name="clip")

text_embedding = embedding_provider.get_text_embedding(text)
text_embedding = self.embedding_provider.get_text_embedding(text)

results = self.image_collection.query(
query_embeddings=[text_embedding.tolist()],
Expand Down
1 change: 0 additions & 1 deletion dimos/manipulation/manip_aio_processer.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def __init__(
self.segmenter = None
if self.enable_segmentation:
self.segmenter = Sam2DSegmenter(
device="cuda",
use_tracker=False, # Disable tracker for simple segmentation
use_analyzer=False, # Disable analyzer for simple segmentation
)
Expand Down
1 change: 0 additions & 1 deletion dimos/manipulation/visual_servoing/manipulation_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
select_points_from_depth,
transform_points_3d,
update_target_grasp_pose,
apply_grasp_distance,
is_target_reached,
)
from dimos.utils.transform_utils import (
Expand Down
148 changes: 67 additions & 81 deletions dimos/msgs/nav_msgs/OccupancyGrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@

from __future__ import annotations

import math
import time
from enum import IntEnum
from typing import TYPE_CHECKING, BinaryIO, Optional

import numpy as np
from dimos_lcm.nav_msgs import MapMetaData
from dimos_lcm.nav_msgs import OccupancyGrid as LCMOccupancyGrid
from dimos_lcm.std_msgs import Time as LCMTime
from scipy import ndimage

from dimos.msgs.geometry_msgs import Pose, Vector3, VectorLike
Expand Down Expand Up @@ -79,7 +80,6 @@ def __init__(
frame_id: Reference frame
ts: Timestamp (defaults to current time if 0)
"""
import time

self.frame_id = frame_id
self.ts = ts if ts != 0 else time.time()
Expand Down Expand Up @@ -114,7 +114,6 @@ def __init__(

def _to_lcm_time(self):
"""Convert timestamp to LCM Time."""
from dimos_lcm.std_msgs import Time as LCMTime

s = int(self.ts)
return LCMTime(sec=s, nsec=int((self.ts - s) * 1_000_000_000))
Expand Down Expand Up @@ -174,11 +173,10 @@ def unknown_percent(self) -> float:
"""Percentage of cells that are unknown."""
return (self.unknown_cells / self.total_cells * 100) if self.total_cells > 0 else 0.0

def inflate(self, radius: float, cost_scaling_factor: float = 0.0) -> "OccupancyGrid":
"""Inflate obstacles by a given radius (vectorized).
def inflate(self, radius: float) -> "OccupancyGrid":
"""Inflate obstacles by a given radius (binary inflation).
Args:
radius: Inflation radius in meters
cost_scaling_factor: Factor for decay (0.0 = no decay, binary inflation)
Returns:
New OccupancyGrid with inflated obstacles
"""
Expand All @@ -188,31 +186,18 @@ def inflate(self, radius: float, cost_scaling_factor: float = 0.0) -> "Occupancy
# Get grid as numpy array
grid_array = self.grid

# Create square kernel for binary inflation
# Create circular kernel for binary inflation
kernel_size = 2 * cell_radius + 1
kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
y, x = np.ogrid[-cell_radius : cell_radius + 1, -cell_radius : cell_radius + 1]
kernel = (x**2 + y**2 <= cell_radius**2).astype(np.uint8)

# Find occupied cells
occupied_mask = grid_array >= CostValues.OCCUPIED

if cost_scaling_factor == 0.0:
# Binary inflation
inflated = ndimage.binary_dilation(occupied_mask, structure=kernel)
result_grid = grid_array.copy()
result_grid[inflated] = CostValues.OCCUPIED
else:
# Distance-based inflation with decay
# Create distance transform from occupied cells
distance_field = ndimage.distance_transform_edt(~occupied_mask)

# Apply exponential decay based on distance
cost_field = CostValues.OCCUPIED * np.exp(-cost_scaling_factor * distance_field)

# Combine with original grid, keeping higher values
result_grid = np.maximum(grid_array, cost_field).astype(np.int8)

# Ensure occupied cells remain at max value
result_grid[occupied_mask] = CostValues.OCCUPIED
# Binary inflation
inflated = ndimage.binary_dilation(occupied_mask, structure=kernel)
result_grid = grid_array.copy()
result_grid[inflated] = CostValues.OCCUPIED

# Create new OccupancyGrid with inflated data using numpy constructor
return OccupancyGrid(
Expand Down Expand Up @@ -350,7 +335,7 @@ def from_pointcloud(
min_height: float = 0.1,
max_height: float = 2.0,
frame_id: Optional[str] = None,
mark_free_radius: float = 0.0,
mark_free_radius: float = 0.4,
) -> "OccupancyGrid":
"""Create an OccupancyGrid from a PointCloud2 message.

Expand All @@ -367,8 +352,6 @@ def from_pointcloud(
Returns:
OccupancyGrid with occupied cells where points were projected
"""
# Import here to avoid circular dependency
from dimos.msgs.sensor_msgs import PointCloud2

# Get points as numpy array
points = cloud.as_numpy()
Expand All @@ -379,22 +362,26 @@ def from_pointcloud(
width=1, height=1, resolution=resolution, frame_id=frame_id or cloud.frame_id
)

# Filter points by height
height_mask = (points[:, 2] >= min_height) & (points[:, 2] <= max_height)
filtered_points = points[height_mask]
# Filter points by height for obstacles
obstacle_mask = (points[:, 2] >= min_height) & (points[:, 2] <= max_height)
obstacle_points = points[obstacle_mask]

if len(filtered_points) == 0:
# No points in height range
# Get points below min_height for marking as free space
ground_mask = points[:, 2] < min_height
ground_points = points[ground_mask]

# Find bounds of the point cloud in X-Y plane (use all points)
if len(points) > 0:
min_x = np.min(points[:, 0])
max_x = np.max(points[:, 0])
min_y = np.min(points[:, 1])
max_y = np.max(points[:, 1])
else:
# Return empty grid if no points at all
return cls(
width=1, height=1, resolution=resolution, frame_id=frame_id or cloud.frame_id
)

# Find bounds of the point cloud in X-Y plane
min_x = np.min(filtered_points[:, 0])
max_x = np.max(filtered_points[:, 0])
min_y = np.min(filtered_points[:, 1])
max_y = np.max(filtered_points[:, 1])

# Add some padding around the bounds
padding = 1.0 # 1 meter padding
min_x -= padding
Expand All @@ -416,23 +403,36 @@ def from_pointcloud(
# Initialize grid (all unknown)
grid = np.full((height, width), -1, dtype=np.int8)

# Convert points to grid indices
grid_x = ((filtered_points[:, 0] - min_x) / resolution).astype(np.int32)
grid_y = ((filtered_points[:, 1] - min_y) / resolution).astype(np.int32)
# First, mark ground points as free space
if len(ground_points) > 0:
ground_x = ((ground_points[:, 0] - min_x) / resolution).astype(np.int32)
ground_y = ((ground_points[:, 1] - min_y) / resolution).astype(np.int32)

# Clip indices to grid bounds
grid_x = np.clip(grid_x, 0, width - 1)
grid_y = np.clip(grid_y, 0, height - 1)
# Clip indices to grid bounds
ground_x = np.clip(ground_x, 0, width - 1)
ground_y = np.clip(ground_y, 0, height - 1)

# Mark cells as occupied
grid[grid_y, grid_x] = 100 # Lethal obstacle
# Mark ground cells as free
grid[ground_y, ground_x] = 0 # Free space

# Mark free space around obstacles based on mark_free_radius
# Then mark obstacle points (will override ground if at same location)
if len(obstacle_points) > 0:
obs_x = ((obstacle_points[:, 0] - min_x) / resolution).astype(np.int32)
obs_y = ((obstacle_points[:, 1] - min_y) / resolution).astype(np.int32)

# Clip indices to grid bounds
obs_x = np.clip(obs_x, 0, width - 1)
obs_y = np.clip(obs_y, 0, height - 1)

# Mark cells as occupied
grid[obs_y, obs_x] = 100 # Lethal obstacle

# Apply mark_free_radius to expand free space areas
if mark_free_radius > 0:
# Mark a specified radius around occupied cells as free
from scipy.ndimage import binary_dilation
# Expand existing free space areas by the specified radius
# This will NOT expand from obstacles, only from free space

occupied_mask = grid == 100
free_mask = grid == 0 # Current free space
free_radius_cells = int(np.ceil(mark_free_radius / resolution))

# Create circular kernel
Expand All @@ -442,20 +442,11 @@ def from_pointcloud(
]
kernel = x**2 + y**2 <= free_radius_cells**2

known_area = binary_dilation(occupied_mask, structure=kernel, iterations=1)
# Mark non-occupied cells in the known area as free
grid[known_area & (grid != 100)] = 0
else:
# Default: only mark immediate neighbors as free to preserve unknown
from scipy.ndimage import binary_dilation

occupied_mask = grid == 100
# Use a small 3x3 kernel to only mark immediate neighbors
structure = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]])
immediate_neighbors = binary_dilation(occupied_mask, structure=structure, iterations=1)
# Dilate free space areas
expanded_free = ndimage.binary_dilation(free_mask, structure=kernel, iterations=1)

# Mark only immediate neighbors as free (not the occupied cells themselves)
grid[immediate_neighbors & (grid != 100)] = 0
# Mark expanded areas as free, but don't override obstacles
grid[expanded_free & (grid != 100)] = 0

# Create and return OccupancyGrid
# Get timestamp from cloud if available
Expand All @@ -479,31 +470,28 @@ def gradient(self, obstacle_threshold: int = 50, max_distance: float = 2.0) -> "

Args:
obstacle_threshold: Cell values >= this are considered obstacles (default: 50)
max_distance: Maximum distance to compute gradient in meters (default: 5.0)
max_distance: Maximum distance to compute gradient in meters (default: 2.0)

Returns:
New OccupancyGrid with gradient values:
- -1: Unknown cells far from obstacles (beyond max_distance)
- -1: Unknown cells (preserved as-is)
- 0: Free space far from obstacles
- 1-99: Increasing cost as you approach obstacles
- 100: At obstacles

Note: Unknown cells within max_distance of obstacles will have gradient
values assigned, allowing path planning through unknown areas.
Note: Unknown cells remain as unknown (-1) and do not receive gradient values.
"""

# Remember which cells are unknown
unknown_mask = self.grid == -1

# Create a working grid where unknown cells are treated as free for distance calculation
working_grid = self.grid.copy()
working_grid[unknown_mask] = 0 # Treat unknown as free for gradient computation
unknown_mask = self.grid == CostValues.UNKNOWN

# Create binary obstacle map from working grid
# Create binary obstacle map
# Consider cells >= threshold as obstacles (1), everything else as free (0)
obstacle_map = (working_grid >= obstacle_threshold).astype(np.float32)
# Unknown cells are not considered obstacles for distance calculation
obstacle_map = (self.grid >= obstacle_threshold).astype(np.float32)

# Compute distance transform (distance to nearest obstacle in cells)
# Unknown cells are treated as if they don't exist for distance calculation
distance_cells = ndimage.distance_transform_edt(1 - obstacle_map)

# Convert to meters and clip to max distance
Expand All @@ -515,15 +503,13 @@ def gradient(self, obstacle_threshold: int = 50, max_distance: float = 2.0) -> "
gradient_values = (1 - distance_meters / max_distance) * 100

# Ensure obstacles are exactly 100
gradient_values[obstacle_map > 0] = 100
gradient_values[obstacle_map > 0] = CostValues.OCCUPIED

# Convert to int8 for OccupancyGrid
gradient_data = gradient_values.astype(np.int8)

# Only preserve unknown cells that are beyond max_distance from any obstacle
# This allows gradient to spread through unknown areas near obstacles
far_unknown_mask = unknown_mask & (distance_meters >= max_distance)
gradient_data[far_unknown_mask] = -1
# Preserve unknown cells as unknown (don't apply gradient to them)
gradient_data[unknown_mask] = CostValues.UNKNOWN

# Create new OccupancyGrid with gradient
gradient_grid = OccupancyGrid(
Expand Down
16 changes: 8 additions & 8 deletions dimos/msgs/nav_msgs/test_OccupancyGrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,14 +231,14 @@ def test_gradient():
grid_with_unknown = OccupancyGrid(data_with_unknown, resolution=0.1)
gradient_with_unknown = grid_with_unknown.gradient(max_distance=1.0) # 1m max distance

# Unknown cells close to obstacles should get gradient values
assert gradient_with_unknown.grid[0, 0] != -1 # Should have gradient
assert gradient_with_unknown.grid[1, 1] != -1 # Should have gradient

# Unknown cells far from obstacles (beyond max_distance) should remain unknown
# The far corner (8,8) is ~0.57m from nearest obstacle, within 1m threshold
# So it will get a gradient value, not remain unknown
assert gradient_with_unknown.unknown_cells < 8 # Some unknowns converted to gradient
# Unknown cells should remain unknown (new behavior - unknowns are preserved)
assert gradient_with_unknown.grid[0, 0] == -1 # Should remain unknown
assert gradient_with_unknown.grid[1, 1] == -1 # Should remain unknown
assert gradient_with_unknown.grid[8, 8] == -1 # Should remain unknown
assert gradient_with_unknown.grid[9, 9] == -1 # Should remain unknown

# Unknown cells count should be preserved
assert gradient_with_unknown.unknown_cells == 8 # All unknowns preserved


def test_filter_above():
Expand Down
1 change: 1 addition & 0 deletions dimos/navigation/bt_navigator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .navigator import BehaviorTreeNavigator
Loading
Loading