Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
ea41664
retry decorator, better vl model query system, json query, bounding b…
leshy Oct 11, 2025
fd7e268
circular import bugfix
leshy Oct 11, 2025
ace0725
better universal json and detection parsing for vlms
leshy Oct 11, 2025
8546465
renamed detections2d to detections
leshy Oct 11, 2025
98e1c24
obsoleted inconvinient detection format entirely
leshy Oct 11, 2025
638d81e
tests cleanup
leshy Oct 11, 2025
b686198
detector grid testing
leshy Oct 12, 2025
75a4abf
yolo person detector cuda
leshy Oct 12, 2025
b6be880
vlm sketch
leshy Oct 12, 2025
8056b24
completely removed dep on old detection format
leshy Oct 12, 2025
e25689c
tests fix, module config fix
leshy Oct 12, 2025
ea238d8
detection3d split into bbox and pc
leshy Oct 12, 2025
928c76c
big detection restructure
leshy Oct 12, 2025
4e82fa9
restructure, mypy
leshy Oct 12, 2025
415eb64
base.py for types, extracted table rendering to utils
leshy Oct 12, 2025
a0a17d6
conftest typing
leshy Oct 12, 2025
fb5f22f
all mypy resolved
leshy Oct 12, 2025
84541f1
session level fixtures
leshy Oct 12, 2025
3d599d1
moondream integrated, generic huggingface model integration
leshy Oct 12, 2025
4d007ea
slightly nicer bounding boxes, slightly better vlm tests
leshy Oct 12, 2025
ce6a923
intelligent annotation font size, model warmup function
leshy Oct 12, 2025
484ae0d
messing with detections
leshy Oct 12, 2025
d0fb0c0
color brightness for from_string
leshy Oct 12, 2025
33df738
mobileclip for reid, transforms for detections
leshy Oct 13, 2025
57d5296
detection reconstruction in another module
leshy Oct 13, 2025
fff177f
reid module, mobileclip
leshy Oct 13, 2025
3771e33
quick person tracker
leshy Oct 14, 2025
342d9af
person tracker cleanup
leshy Oct 14, 2025
8cdc92a
clip/mobileclip standardized implementation
leshy Oct 15, 2025
9e6c6d1
reid experiment
leshy Oct 15, 2025
a2813e8
reid simplification
leshy Oct 15, 2025
7687619
disabling single test for now
leshy Oct 15, 2025
c13395f
removing garbage files
leshy Oct 15, 2025
451b309
correct test naming
leshy Oct 15, 2025
5f810fb
renamde type.py -> base.py for embedding models
leshy Oct 15, 2025
fb95c2f
Merge branch 'dev' into detection-qwen-skills
leshy Oct 15, 2025
9dea5ee
openclip optional, passing tests
leshy Oct 15, 2025
268501d
image backend test skip
leshy Oct 15, 2025
5fdac33
removing .claude
leshy Oct 15, 2025
07761e1
tests fix
leshy Oct 15, 2025
2c5565c
mobile clip optional
leshy Oct 15, 2025
3cd5641
torch reid import issues fix
leshy Oct 16, 2025
a29e156
removing package optionality for now
leshy Oct 16, 2025
338693f
embedding models heavy tests import fix
leshy Oct 16, 2025
8d5c0ae
resolved import issues
leshy Oct 16, 2025
c4ebc93
unified import resolution strategy
leshy Oct 16, 2025
0aa462c
disabled embedding tests for now
leshy Oct 17, 2025
4ca85ec
marking tests as gpu, not heavy
leshy Oct 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .envrc.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
if ! has nix_direnv_version || ! nix_direnv_version 3.0.6; then
source_url "https://raw.githubusercontent.com/nix-community/nix-direnv/3.0.6/direnvrc" "sha256-RYcUJaRMf8oF5LznDrlCXbkOQrywm0HDv1VjYGaJGdM="
fi
use flake .
dotenv_if_exists
2 changes: 2 additions & 0 deletions .envrc.venv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
source env/bin/activate
dotenv_if_exists
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ yolo11n.pt

# symlink one of .envrc.* if you'd like to use
.envrc
.claude
3 changes: 3 additions & 0 deletions data/.lfs/models_mobileclip.tar.gz
Git LFS file not shown
4 changes: 2 additions & 2 deletions data/.lfs/models_yolo.tar.gz
Git LFS file not shown
73 changes: 59 additions & 14 deletions dimos/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import asyncio
import threading

import pytest


Expand All @@ -24,37 +25,81 @@ def event_loop():
loop.close()


_session_threads = set()
_seen_threads = set()
_seen_threads_lock = threading.RLock()
_before_test_threads = {} # Map test name to set of thread IDs before test

_skip_for = ["lcm", "heavy", "ros"]


@pytest.hookimpl()
def pytest_sessionfinish(session):
"""Track threads that exist at session start - these are not leaks."""

yield

# Check for session-level thread leaks at teardown
final_threads = [
t
for t in threading.enumerate()
if t.name != "MainThread" and t.ident not in _session_threads
]

if final_threads:
thread_info = [f"{t.name} (daemon={t.daemon})" for t in final_threads]
pytest.fail(
f"\n{len(final_threads)} thread(s) leaked during test session: {thread_info}\n"
"Session-scoped fixtures must clean up all threads in their teardown."
)


@pytest.fixture(autouse=True)
def monitor_threads(request):
# Skip monitoring for tests marked with specified markers
if any(request.node.get_closest_marker(marker) for marker in _skip_for):
yield
return

# Capture threads before test runs
test_name = request.node.nodeid
with _seen_threads_lock:
_before_test_threads[test_name] = {
t.ident for t in threading.enumerate() if t.ident is not None
}

yield

threads = [t for t in threading.enumerate() if t.name != "MainThread"]
# Only check for threads created BY THIS TEST, not existing ones
with _seen_threads_lock:
before = _before_test_threads.get(test_name, set())
current = {t.ident for t in threading.enumerate() if t.ident is not None}

if not threads:
return
# New threads are ones that exist now but didn't exist before this test
new_thread_ids = current - before

with _seen_threads_lock:
new_leaks = [t for t in threads if t.ident not in _seen_threads]
for t in threads:
_seen_threads.add(t.ident)
if not new_thread_ids:
return

if not new_leaks:
return
# Get the actual thread objects for new threads
new_threads = [
t for t in threading.enumerate() if t.ident in new_thread_ids and t.name != "MainThread"
]

# Filter out threads we've already seen (from previous tests)
truly_new = [t for t in new_threads if t.ident not in _seen_threads]

# Mark all new threads as seen
for t in new_threads:
if t.ident is not None:
_seen_threads.add(t.ident)

if not truly_new:
return

thread_names = [t.name for t in new_leaks]
thread_names = [t.name for t in truly_new]

pytest.fail(
f"Non-closed threads before or during this test. The thread names: {thread_names}. "
"Please look at the first test that fails and fix that."
)
pytest.fail(
f"Non-closed threads created during this test. Thread names: {thread_names}. "
"Please look at the first test that fails and fix that."
)
30 changes: 30 additions & 0 deletions dimos/models/embedding/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from dimos.models.embedding.base import Embedding, EmbeddingModel

__all__ = [
"Embedding",
"EmbeddingModel",
]

# Optional: CLIP support
try:
from dimos.models.embedding.clip import CLIPEmbedding, CLIPModel

__all__.extend(["CLIPEmbedding", "CLIPModel"])
except ImportError:
pass

# Optional: MobileCLIP support
try:
from dimos.models.embedding.mobileclip import MobileCLIPEmbedding, MobileCLIPModel

__all__.extend(["MobileCLIPEmbedding", "MobileCLIPModel"])
except ImportError:
pass

# Optional: TorchReID support
try:
from dimos.models.embedding.treid import TorchReIDEmbedding, TorchReIDModel

__all__.extend(["TorchReIDEmbedding", "TorchReIDModel"])
except ImportError:
pass
148 changes: 148 additions & 0 deletions dimos/models/embedding/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Copyright 2025 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import time
from abc import ABC, abstractmethod
from typing import Generic, Optional, TypeVar

import numpy as np
import torch

from dimos.msgs.sensor_msgs import Image
from dimos.types.timestamped import Timestamped


class Embedding(Timestamped):
"""Base class for embeddings with vector data.

Supports both torch.Tensor (for GPU-accelerated comparisons) and np.ndarray.
Embeddings are kept as torch.Tensor on device by default for efficiency.
"""

vector: torch.Tensor | np.ndarray

def __init__(self, vector: torch.Tensor | np.ndarray, timestamp: Optional[float] = None):
self.vector = vector
if timestamp:
self.timestamp = timestamp
else:
self.timestamp = time.time()

def __matmul__(self, other: "Embedding") -> float:
"""Compute cosine similarity via @ operator."""
if isinstance(self.vector, torch.Tensor):
other_tensor = other.to_torch(self.vector.device)
result = self.vector @ other_tensor
return result.item()
return float(self.vector @ other.to_numpy())

def to_numpy(self) -> np.ndarray:
"""Convert to numpy array (moves to CPU if needed)."""
if isinstance(self.vector, torch.Tensor):
return self.vector.detach().cpu().numpy()
return self.vector

def to_torch(self, device: str | torch.device | None = None) -> torch.Tensor:
"""Convert to torch tensor on specified device."""
if isinstance(self.vector, np.ndarray):
tensor = torch.from_numpy(self.vector)
return tensor.to(device) if device else tensor

if device is not None and self.vector.device != torch.device(device):
return self.vector.to(device)
return self.vector

def to_cpu(self) -> "Embedding":
"""Move embedding to CPU, returning self for chaining."""
if isinstance(self.vector, torch.Tensor):
self.vector = self.vector.cpu()
return self


E = TypeVar("E", bound="Embedding")


class EmbeddingModel(ABC, Generic[E]):
"""Abstract base class for embedding models supporting vision and language."""

device: str
normalize: bool = True

@abstractmethod
def embed(self, *images: Image) -> E | list[E]:
"""
Embed one or more images.
Returns single Embedding if one image, list if multiple.
"""
pass

@abstractmethod
def embed_text(self, *texts: str) -> E | list[E]:
"""
Embed one or more text strings.
Returns single Embedding if one text, list if multiple.
"""
pass

def compare_one_to_many(self, query: E, candidates: list[E]) -> torch.Tensor:
"""
Efficiently compare one query against many candidates on GPU.

Args:
query: Query embedding
candidates: List of candidate embeddings

Returns:
torch.Tensor of similarities (N,)
"""
query_tensor = query.to_torch(self.device)
candidate_tensors = torch.stack([c.to_torch(self.device) for c in candidates])
return query_tensor @ candidate_tensors.T

def compare_many_to_many(self, queries: list[E], candidates: list[E]) -> torch.Tensor:
"""
Efficiently compare all queries against all candidates on GPU.

Args:
queries: List of query embeddings
candidates: List of candidate embeddings

Returns:
torch.Tensor of similarities (M, N) where M=len(queries), N=len(candidates)
"""
query_tensors = torch.stack([q.to_torch(self.device) for q in queries])
candidate_tensors = torch.stack([c.to_torch(self.device) for c in candidates])
return query_tensors @ candidate_tensors.T

def query(self, query_emb: E, candidates: list[E], top_k: int = 5) -> list[tuple[int, float]]:
"""
Find top-k most similar candidates to query (GPU accelerated).

Args:
query_emb: Query embedding
candidates: List of candidate embeddings
top_k: Number of top results to return

Returns:
List of (index, similarity) tuples sorted by similarity (descending)
"""
similarities = self.compare_one_to_many(query_emb, candidates)
top_values, top_indices = similarities.topk(k=min(top_k, len(candidates)))
return [(idx.item(), val.item()) for idx, val in zip(top_indices, top_values)]

def warmup(self) -> None:
"""Optional warmup method to pre-load model."""
pass
Loading
Loading