Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
cff2e83
Initial working CPU build
spomichter Jul 1, 2025
beff0f1
Unneeded pytest avoid dir
spomichter Jul 3, 2025
b6558e7
Fix costmap check bug
spomichter Jul 4, 2025
98f9c20
Fixed go2 webrtc connect wheel install via pyproject
spomichter Jul 4, 2025
39a8b96
Test CPU build on lenovo tiny desktop
spomichter Jul 4, 2025
41dc52f
Added graceful failure handling for Detic and Metric3d in CPU-only mode
spomichter Jul 7, 2025
c02eee1
Fix onnxruntime/gpu clash and cuda11/12 support
spomichter Jul 8, 2025
806c8a1
Remove unneeded req file
spomichter Jul 8, 2025
b5c81aa
Fix aiortc/aioice dependency conflict ONLY in github runner(?)
spomichter Jul 8, 2025
de50600
Test docker build from python wheel
spomichter Jul 8, 2025
a4fa7f0
Test docker python package editable mode
spomichter Jul 8, 2025
65a2e35
Test setuptools docker install
spomichter Jul 8, 2025
45d03ef
Test normal package install + added pytest to dev requirements
spomichter Jul 8, 2025
82d9669
Test add full dimos source to docker/python
spomichter Jul 8, 2025
2d189a7
Fix PEP 621 metadata bug
spomichter Jul 8, 2025
c37b1e1
Force docker/python rebuild
spomichter Jul 8, 2025
075cc35
Test setuptools>=70
spomichter Jul 8, 2025
1da8385
Fix blinker 1.4 distutils error
spomichter Jul 8, 2025
bfb2fa8
Force packaging>=24 in docker
spomichter Jul 8, 2025
4e2d83d
Pip break system packages flag
spomichter Jul 8, 2025
9c8c382
Fix other distutils packages blocking pip in docker
spomichter Jul 8, 2025
2236383
Docker builds fully working with dimOS CPU package build
spomichter Jul 8, 2025
f8a3f3c
Test no pytest in dev requirements
spomichter Jul 9, 2025
3134793
Add dev deps to 'dev' extras, change docker dev build
spomichter Jul 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions bin/cuda/fix_ort.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
# This script fixes the onnxruntime <--> onnxruntime-gpu package clash
# that occurs when chromadb and other dependencies require the CPU-only
# onnxruntime package. It removes onnxruntime and reinstalls the GPU version.
set -euo pipefail

: "${GPU_VER:=1.18.1}"

python - <<PY
import subprocess, sys, importlib.metadata as md

gpu_ver = "${GPU_VER}"

def has_dist(name):
try:
md.version(name)
return True
except md.PackageNotFoundError:
return False

if has_dist("onnxruntime"):
print("Removing CPU-only onnxruntime wheel …")
subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", "onnxruntime"])

print(f"Reinstalling onnxruntime-gpu=={gpu_ver} …")
subprocess.check_call([
sys.executable, "-m", "pip", "install",
"--no-deps", "--force-reinstall", f"onnxruntime-gpu=={gpu_ver}"
])
PY
3 changes: 2 additions & 1 deletion dimos/perception/detection2d/yolo_2d_det.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def __init__(self, model_path="models_yolo", model_name="yolo11n.onnx", device="
module_dir = os.path.dirname(__file__)
self.tracker_config = os.path.join(module_dir, "config", "custom_tracker.yaml")
if is_cuda_available():
onnxruntime.preload_dlls(cuda=True, cudnn=True)
if hasattr(onnxruntime, "preload_dlls"): # Handles CUDA 11 / onnxruntime-gpu<=1.18
onnxruntime.preload_dlls(cuda=True, cudnn=True)
self.device = "cuda"
logger.info("Using CUDA for YOLO 2d detector")
else:
Expand Down
65 changes: 50 additions & 15 deletions dimos/perception/object_detection_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@
from reactivex import operators as ops

from dimos.perception.detection2d.yolo_2d_det import Yolo2DDetector
from dimos.perception.detection2d.detic_2d_det import Detic2DDetector

try:
from dimos.perception.detection2d.detic_2d_det import Detic2DDetector

DETIC_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
DETIC_AVAILABLE = False
Detic2DDetector = None
from dimos.models.depth.metric3d import Metric3D
from dimos.perception.detection2d.utils import (
calculate_depth_from_bbox,
Expand Down Expand Up @@ -83,25 +90,47 @@ def __init__(
self.disable_depth = disable_depth
self.draw_masks = draw_masks
# Initialize object detector
self.detector = detector or Detic2DDetector(vocabulary=None, threshold=min_confidence)
if detector is not None:
self.detector = detector
else:
if DETIC_AVAILABLE:
try:
self.detector = Detic2DDetector(vocabulary=None, threshold=min_confidence)
logger.info("Using Detic2DDetector")
except Exception as e:
logger.warning(
f"Failed to initialize Detic2DDetector: {e}. Falling back to Yolo2DDetector."
)
self.detector = Yolo2DDetector()
else:
logger.info("Detic not available. Using Yolo2DDetector.")
self.detector = Yolo2DDetector()
# Set up camera intrinsics
self.camera_intrinsics = camera_intrinsics

# Initialize depth estimation model
self.depth_model = None
if not disable_depth:
self.depth_model = Metric3D(gt_depth_scale)
try:
self.depth_model = Metric3D(gt_depth_scale)

if camera_intrinsics is not None:
self.depth_model.update_intrinsic(camera_intrinsics)
if camera_intrinsics is not None:
self.depth_model.update_intrinsic(camera_intrinsics)

# Create 3x3 camera matrix for calculations
fx, fy, cx, cy = camera_intrinsics
self.camera_matrix = np.array(
[[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32
)
else:
raise ValueError("camera_intrinsics must be provided")
# Create 3x3 camera matrix for calculations
fx, fy, cx, cy = camera_intrinsics
self.camera_matrix = np.array(
[[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32
)
else:
raise ValueError("camera_intrinsics must be provided")

logger.info("Depth estimation enabled with Metric3D")
except Exception as e:
logger.warning(f"Failed to initialize Metric3D depth model: {e}")
logger.warning("Falling back to disable_depth=True mode")
self.disable_depth = True
self.depth_model = None
else:
logger.info("Depth estimation disabled")

Expand All @@ -123,9 +152,15 @@ def create_stream(self, video_stream: Observable) -> Observable:
"""

def process_frame(frame):
# Detect objects
bboxes, track_ids, class_ids, confidences, names, masks = self.detector.process_image(
frame
# TODO: More modular detector output interface
bboxes, track_ids, class_ids, confidences, names, *mask_data = (
self.detector.process_image(frame) + ([],)
)

masks = (
mask_data[0]
if mask_data and len(mask_data[0]) == len(bboxes)
else [None] * len(bboxes)
)

# Create visualization
Expand Down
3 changes: 2 additions & 1 deletion dimos/perception/segmentation/sam_2d_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def __init__(
self.device = device
if is_cuda_available():
logger.info("Using CUDA for SAM 2d segmenter")
onnxruntime.preload_dlls(cuda=True, cudnn=True)
if hasattr(onnxruntime, "preload_dlls"): # Handles CUDA 11 / onnxruntime-gpu<=1.18
onnxruntime.preload_dlls(cuda=True, cudnn=True)
self.device = "cuda"
else:
logger.info("Using CPU for SAM 2d segmenter")
Expand Down
2 changes: 1 addition & 1 deletion dimos/robot/unitree_webrtc/type/lidar.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class LidarMessage(PointCloud2):
resolution: float # we lose resolution when encoding PointCloud2
origin: Vector3
raw_msg: Optional[RawLidarMsg]
_costmap: Optional[Costmap]
_costmap: Optional[Costmap] = None

def __init__(self, **kwargs):
super().__init__(
Expand Down
5 changes: 3 additions & 2 deletions docker/dev/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ RUN apt-get update && apt-get install -y \
# Configure git to trust any directory (resolves dubious ownership issues in containers)
RUN git config --global --add safe.directory '*'

COPY docker/dev/dev-requirements.txt /app/
RUN --mount=type=cache,target=/root/.cache/pip pip install -r dev-requirements.txt
COPY . /app/
WORKDIR /app
RUN --mount=type=cache,target=/root/.cache/pip pip install .[dev]

# Copy files and add version to motd
COPY /assets/dimensionalascii.txt /etc/motd
Expand Down
3 changes: 0 additions & 3 deletions docker/dev/dev-requirements.txt

This file was deleted.

10 changes: 5 additions & 5 deletions docker/python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ RUN apt-get install -y \
qtbase5-dev-tools \
supervisor

# Fix distutils-installed packages that block pip upgrades
RUN apt-get purge -y python3-blinker python3-sympy python3-oauthlib || true

RUN mkdir -p /app/dimos

COPY requirements.txt /app/
COPY base-requirements.txt /app/
COPY . /app/

WORKDIR /app

RUN --mount=type=cache,target=/root/.cache/pip pip install -r base-requirements.txt

RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip bash -c "pip install --upgrade 'pip>=24' 'setuptools>=70' 'wheel' 'packaging>=24' && pip install '.[cpu]'"
65 changes: 42 additions & 23 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["setuptools>=42", "wheel"]
requires = ["setuptools>=70", "wheel"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
Expand Down Expand Up @@ -30,15 +30,13 @@ dependencies = [
"lark",
"plum-dispatch==2.5.7",
"ffmpeg-python",
"pytest",
"tiktoken>=0.8.0",
"Flask>=2.2",
"python-multipart==0.0.20",
"reactivex",
"rxpy-backpressure @ git+https://github.com/dimensionalOS/rxpy-backpressure.git",
"pytest-asyncio==0.26.0",
"asyncio==3.4.3",
"go2-webrtc-connect @ git+https://github.com/legion1581/go2_webrtc_connect.git@fe64abb5987594e8c048427a98445799f6f6a9cc",
"go2-webrtc-connect @ git+https://github.com/dimensionalOS/go2_webrtc_connect.git",

# Web Extensions
"fastapi>=0.115.6",
Expand Down Expand Up @@ -73,34 +71,27 @@ dependencies = [
# Vector Embedding
"sentence_transformers",

# CTransforms GGUF
"ctransformers[cuda]==0.2.27",

# Perception Dependencies
"ultralytics>=8.3.70",
"filterpy>=1.4.5",
"scipy>=1.15.1",
"scikit-learn",
"Pillow",
"mmengine>=0.10.3",
"mmcv>=2.1.0",
"clip @ git+https://github.com/openai/CLIP.git",
"timm>=1.0.15",
"lap>=0.5.12",
"xformers==0.0.20",

# Detic
"mss",
"dataclasses",
"ftfy",
"regex",
"fasttext",
"lvis",
"nltk",
"clip @ git+https://github.com/openai/CLIP.git",
"detectron2 @ git+https://github.com/facebookresearch/detectron2.git@v0.6",


# Mapping
"open3d",

# Inference

"onnx",

# Multiprocess
"dask[complete]==2025.5.1",
"lcm_msgs @ git+https://github.com/dimensionalOS/python_lcm_msgs.git@main#egg=lcm_msgs"
]

[project.optional-dependencies]
Expand All @@ -124,9 +115,38 @@ manipulation = [
"plotly>=5.9.0",
]

cpu = [
# CPU inference backends
"onnxruntime",
"ctransformers==0.2.27",
]

cuda = [
"pycuda",
"onnxruntime-gpu[cuda,cudnn]"
"onnxruntime-gpu>=1.17.1,<=1.18.1", # Only versions supporting both cuda11 and cuda12
"ctransformers[cuda]==0.2.27",
"mmengine>=0.10.3",
"mmcv>=2.1.0",
"xformers==0.0.20",

# Detic GPU stack
"mss",
"dataclasses",
"ftfy",
"regex",
"fasttext",
"lvis",
"nltk",
"clip @ git+https://github.com/openai/CLIP.git",
"detectron2 @ git+https://github.com/facebookresearch/detectron2.git@v0.6",
]

dev = [
"ruff==0.11.10",
"mypy==1.15.0",
"pre_commit==4.2.0",
"pytest",
"pytest-asyncio==0.26.0"
]

[tool.ruff]
Expand Down Expand Up @@ -160,7 +180,6 @@ files = [

[tool.pytest.ini_options]
testpaths = ["dimos"]
norecursedirs = ["dimos/robot/unitree/external"]
markers = [
"vis: marks tests that run visuals and require a visual check by dev",
"benchmark: benchmark, executes something multiple times, calculates avg, prints to console",
Expand Down
14 changes: 2 additions & 12 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ reactivex
git+https://github.com/dimensionalOS/rxpy-backpressure.git
pytest-asyncio==0.26.0
asyncio==3.4.3
-e git+https://github.com/legion1581/go2_webrtc_connect.git@fe64abb5987594e8c048427a98445799f6f6a9cc#egg=go2_webrtc_connect
#-e git+https://github.com/legion1581/aioice.git@ff5755a1e37127411b5fc797c105804db8437445#egg=aioice

-e git+https://github.com/dimensionalOS/go2_webrtc_connect.git#egg=go2_webrtc_connect
# Web Extensions
fastapi>=0.115.6
sse-starlette>=2.2.1
Expand All @@ -42,15 +40,6 @@ pydantic
# Developer Specific
ipykernel

# Unitree webrtc streaming
aiortc==1.9.0
pycryptodome
opencv-python
sounddevice
pyaudio
requests
wasmtime

# Audio
openai-whisper
soundfile
Expand Down Expand Up @@ -97,6 +86,7 @@ open3d

# Inference (CPU)
onnxruntime
onnx

# Terminal colors
rich==14.0.0
Expand Down
7 changes: 1 addition & 6 deletions tests/test_object_detection_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@
from dimos.perception.object_detection_stream import ObjectDetectionStream
from dimos.types.vector import Vector
from dimos.utils.reactive import backpressure
from dimos.perception.detection2d.detic_2d_det import Detic2DDetector

from dotenv import load_dotenv


Expand Down Expand Up @@ -103,9 +101,6 @@ def main():
class_filter = None # No class filtering
web_port = 5555

# Initialize detector
detector = Detic2DDetector(vocabulary=None, threshold=min_confidence)

# Initialize based on mode
if args.mode == "robot":
print("Initializing in robot mode...")
Expand Down Expand Up @@ -166,9 +161,9 @@ def main():
camera_intrinsics=camera_intrinsics,
min_confidence=min_confidence,
class_filter=class_filter,
detector=detector,
video_stream=video_stream,
disable_depth=False,
draw_masks=True,
)

# Set placeholder robot for cleanup
Expand Down