Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
*.ico binary
*.pdf binary
# Explicit LFS tracking for test files
tests/data/.lfs/*.tar.gz filter=lfs diff=lfs merge=lfs -text
/data/.lfs/*.tar.gz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text binary
*.mp4 filter=lfs diff=lfs merge=lfs -text binary
*.mov filter=lfs diff=lfs merge=lfs -text binary
Expand Down
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ assets/agent/memory.txt
.bash_history

# Ignore all test data directories but allow compressed files
tests/data/*
!tests/data/.lfs/
/data/*
!/data/.lfs/

# node env (used by devcontainers cli)
node_modules
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
18 changes: 10 additions & 8 deletions dimos/agents/memory/image_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,17 @@
using pre-trained models like CLIP, ResNet, etc.
"""

import base64
import io
import os
import numpy as np
from typing import Union
from PIL import Image
import io

import cv2
import base64
import numpy as np
from PIL import Image

from dimos.utils.data import get_data
from dimos.utils.logging_config import setup_logger
from dimos.utils.testing import testData

logger = setup_logger("dimos.agents.memory.image_embedding")

Expand Down Expand Up @@ -60,12 +62,12 @@ def __init__(self, model_name: str = "clip", dimensions: int = 512):
def _initialize_model(self):
"""Initialize the specified embedding model."""
try:
import torch
from transformers import CLIPProcessor, AutoFeatureExtractor, AutoModel
import onnxruntime as ort
import torch
from transformers import AutoFeatureExtractor, AutoModel, CLIPProcessor

if self.model_name == "clip":
model_id = testData("models_clip") / "model.onnx"
model_id = get_data("models_clip") / "model.onnx"
processor_id = "openai/clip-vit-base-patch32"
self.model = ort.InferenceSession(model_id)
self.processor = CLIPProcessor.from_pretrained(processor_id)
Expand Down
8 changes: 5 additions & 3 deletions dimos/agents/memory/test_image_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@

import os
import time

import numpy as np
import pytest
import reactivex as rx
from reactivex import operators as ops
from dimos.stream.video_provider import VideoProvider

from dimos.agents.memory.image_embedding import ImageEmbeddingProvider
from dimos.stream.video_provider import VideoProvider


class TestImageEmbedding:
Expand All @@ -44,9 +46,9 @@ def test_clip_embedding_initialization(self):
def test_clip_embedding_process_video(self):
"""Test CLIP embedding provider can process video frames and return embeddings."""
try:
from dimos.utils.testing import testData
from dimos.utils.data import get_data

video_path = testData("assets") / "trimmed_video_office.mov"
video_path = get_data("assets") / "trimmed_video_office.mov"

embedding_provider = ImageEmbeddingProvider(model_name="clip", dimensions=512)

Expand Down
10 changes: 6 additions & 4 deletions dimos/perception/detection2d/test_yolo_2d_det.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@

import os
import time
import pytest

import cv2
import numpy as np
import pytest
import reactivex as rx
from reactivex import operators as ops

from dimos.perception.detection2d.yolo_2d_det import Yolo2DDetector
from dimos.stream.video_provider import VideoProvider

Expand All @@ -37,12 +39,12 @@ def test_yolo_detector_initialization(self):
def test_yolo_detector_process_image(self):
"""Test YOLO detector can process video frames and return detection results."""
try:
# Import testData inside method to avoid pytest fixture confusion
from dimos.utils.testing import testData
# Import data inside method to avoid pytest fixture confusion
from dimos.utils.data import get_data

detector = Yolo2DDetector()

video_path = testData("assets") / "trimmed_video_office.mov"
video_path = get_data("assets") / "trimmed_video_office.mov"

# Create video provider and directly get a video stream observable
assert os.path.exists(video_path), f"Test video not found: {video_path}"
Expand Down
14 changes: 8 additions & 6 deletions dimos/perception/detection2d/yolo_2d_det.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import cv2
import onnxruntime
from ultralytics import YOLO

from dimos.perception.detection2d.utils import (
extract_detection_results,
plot_results,
filter_detections,
plot_results,
)
import os
import onnxruntime
from dimos.utils.data import get_data
from dimos.utils.gpu_utils import is_cuda_available
from dimos.utils.logging_config import setup_logger
from dimos.utils.path_utils import get_project_root
from dimos.utils.testing import testData
from dimos.utils.gpu_utils import is_cuda_available

logger = setup_logger("dimos.perception.detection2d.yolo_2d_det")

Expand All @@ -40,7 +42,7 @@ def __init__(self, model_path="models_yolo", model_name="yolo11n.onnx", device="
device (str): Device to run inference on ('cuda' or 'cpu')
"""
self.device = device
self.model = YOLO(testData(model_path) / model_name)
self.model = YOLO(get_data(model_path) / model_name)

module_dir = os.path.dirname(__file__)
self.tracker_config = os.path.join(module_dir, "config", "custom_tracker.yaml")
Expand Down
22 changes: 12 additions & 10 deletions dimos/perception/segmentation/sam_2d_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import cv2
import os
import time
from collections import deque
from concurrent.futures import ThreadPoolExecutor

import cv2
import onnxruntime
from ultralytics import FastSAM

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why all of these import changes for no reason

from dimos.perception.common.detection2d_tracker import get_tracked_results, target2dTracker
from dimos.perception.segmentation.image_analyzer import ImageAnalyzer
from dimos.perception.segmentation.utils import (
crop_images_from_bboxes,
extract_masks_bboxes_probs_names,
filter_segmentation_results,
plot_results,
crop_images_from_bboxes,
)
from dimos.utils.data import get_data
from dimos.utils.gpu_utils import is_cuda_available
from dimos.perception.common.detection2d_tracker import target2dTracker, get_tracked_results
from dimos.perception.segmentation.image_analyzer import ImageAnalyzer
import os
from collections import deque
from concurrent.futures import ThreadPoolExecutor
from dimos.utils.logging_config import setup_logger
from dimos.utils.path_utils import get_project_root
import onnxruntime
from dimos.utils.testing import testData

logger = setup_logger("dimos.perception.segmentation.sam_2d_seg")

Expand All @@ -55,7 +57,7 @@ def __init__(
logger.info("Using CPU for SAM 2d segmenter")
self.device = "cpu"
# Core components
self.model = FastSAM(testData(model_path) / model_name)
self.model = FastSAM(get_data(model_path) / model_name)
self.use_tracker = use_tracker
self.use_analyzer = use_analyzer
self.use_rich_labeling = use_rich_labeling
Expand Down
14 changes: 8 additions & 6 deletions dimos/perception/segmentation/test_sam_2d_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@

import os
import time
from dimos.stream import video_provider
import pytest

import cv2
import numpy as np
import pytest
import reactivex as rx
from reactivex import operators as ops
from dimos.stream.video_provider import VideoProvider

from dimos.perception.segmentation.sam_2d_seg import Sam2DSegmenter
from dimos.perception.segmentation.utils import extract_masks_bboxes_probs_names
from dimos.stream import video_provider
from dimos.stream.video_provider import VideoProvider


class TestSam2DSegmenter:
Expand All @@ -39,11 +41,11 @@ def test_sam_segmenter_initialization(self):

def test_sam_segmenter_process_image(self):
"""Test FastSAM segmenter can process video frames and return segmentation masks."""
# Import testData inside method to avoid pytest fixture confusion
from dimos.utils.testing import testData
# Import get data inside method to avoid pytest fixture confusion
from dimos.utils.data import get_data

# Get test video path directly
video_path = testData("assets") / "trimmed_video_office.mov"
video_path = get_data("assets") / "trimmed_video_office.mov"
try:
# Initialize segmenter without analyzer for faster testing
segmenter = Sam2DSegmenter(use_analyzer=False)
Expand Down
16 changes: 8 additions & 8 deletions dimos/perception/test_spatial_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@
# limitations under the License.

import os
import time
import shutil
import tempfile
import pytest
import numpy as np
import time

import cv2
import shutil
import numpy as np
Copy link
Contributor

@spomichter spomichter Jun 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import changes pretty confusing to read @leshy . Idk why diff does this

import pytest
import reactivex as rx
from reactivex import Observable
from reactivex import operators as ops
from reactivex.subject import Subject
from reactivex import Observable

from dimos.perception.spatial_perception import SpatialMemory
from dimos.types.position import Position
from dimos.stream.video_provider import VideoProvider
from dimos.types.position import Position
from dimos.types.vector import Vector
Expand Down Expand Up @@ -101,9 +101,9 @@ def test_spatial_memory_processing(self, temp_dir):
min_time_threshold=0.01,
)

from dimos.utils.testing import testData
from dimos.utils.data import get_data

video_path = testData("assets") / "trimmed_video_office.mov"
video_path = get_data("assets") / "trimmed_video_office.mov"
assert os.path.exists(video_path), f"Test video not found: {video_path}"
video_provider = VideoProvider(dev_name="test_video", video_source=video_path)
video_stream = video_provider.capture_video_as_observable(realtime=False, fps=15)
Expand Down
Loading