diff --git a/bin/cuda/fix_ort.sh b/bin/cuda/fix_ort.sh new file mode 100755 index 0000000000..182f387364 --- /dev/null +++ b/bin/cuda/fix_ort.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# This script fixes the onnxruntime <--> onnxruntime-gpu package clash +# that occurs when chromadb and other dependencies require the CPU-only +# onnxruntime package. It removes onnxruntime and reinstalls the GPU version. +set -euo pipefail + +: "${GPU_VER:=1.18.1}" + +python - < Observable: """ def process_frame(frame): - # Detect objects - bboxes, track_ids, class_ids, confidences, names, masks = self.detector.process_image( - frame + # TODO: More modular detector output interface + bboxes, track_ids, class_ids, confidences, names, *mask_data = ( + self.detector.process_image(frame) + ([],) + ) + + masks = ( + mask_data[0] + if mask_data and len(mask_data[0]) == len(bboxes) + else [None] * len(bboxes) ) # Create visualization diff --git a/dimos/perception/segmentation/sam_2d_seg.py b/dimos/perception/segmentation/sam_2d_seg.py index f1d32d4daf..d33c7faa0d 100644 --- a/dimos/perception/segmentation/sam_2d_seg.py +++ b/dimos/perception/segmentation/sam_2d_seg.py @@ -51,7 +51,8 @@ def __init__( self.device = device if is_cuda_available(): logger.info("Using CUDA for SAM 2d segmenter") - onnxruntime.preload_dlls(cuda=True, cudnn=True) + if hasattr(onnxruntime, "preload_dlls"): # Handles CUDA 11 / onnxruntime-gpu<=1.18 + onnxruntime.preload_dlls(cuda=True, cudnn=True) self.device = "cuda" else: logger.info("Using CPU for SAM 2d segmenter") diff --git a/dimos/robot/unitree_webrtc/type/lidar.py b/dimos/robot/unitree_webrtc/type/lidar.py index 3b6ab99c93..f45cb8dfe7 100644 --- a/dimos/robot/unitree_webrtc/type/lidar.py +++ b/dimos/robot/unitree_webrtc/type/lidar.py @@ -53,7 +53,7 @@ class LidarMessage(PointCloud2): resolution: float # we lose resolution when encoding PointCloud2 origin: Vector3 raw_msg: Optional[RawLidarMsg] - _costmap: Optional[Costmap] + _costmap: Optional[Costmap] = None def __init__(self, **kwargs): super().__init__( diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index 05725add6f..4eb6a8f247 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -23,8 +23,9 @@ RUN apt-get update && apt-get install -y \ # Configure git to trust any directory (resolves dubious ownership issues in containers) RUN git config --global --add safe.directory '*' -COPY docker/dev/dev-requirements.txt /app/ -RUN --mount=type=cache,target=/root/.cache/pip pip install -r dev-requirements.txt +COPY . /app/ +WORKDIR /app +RUN --mount=type=cache,target=/root/.cache/pip pip install .[dev] # Copy files and add version to motd COPY /assets/dimensionalascii.txt /etc/motd diff --git a/docker/dev/dev-requirements.txt b/docker/dev/dev-requirements.txt deleted file mode 100644 index 9633816cf2..0000000000 --- a/docker/dev/dev-requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -ruff==0.11.10 -mypy==1.15.0 -pre_commit==4.2.0 diff --git a/docker/python/Dockerfile b/docker/python/Dockerfile index f08510faa5..f8d06496b4 100644 --- a/docker/python/Dockerfile +++ b/docker/python/Dockerfile @@ -31,13 +31,13 @@ RUN apt-get install -y \ qtbase5-dev-tools \ supervisor +# Fix distutils-installed packages that block pip upgrades +RUN apt-get purge -y python3-blinker python3-sympy python3-oauthlib || true + RUN mkdir -p /app/dimos -COPY requirements.txt /app/ -COPY base-requirements.txt /app/ +COPY . /app/ WORKDIR /app -RUN --mount=type=cache,target=/root/.cache/pip pip install -r base-requirements.txt - -RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip bash -c "pip install --upgrade 'pip>=24' 'setuptools>=70' 'wheel' 'packaging>=24' && pip install '.[cpu]'" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3e68e6f1cd..579f55c29e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=42", "wheel"] +requires = ["setuptools>=70", "wheel"] build-backend = "setuptools.build_meta" [tool.setuptools] @@ -30,15 +30,13 @@ dependencies = [ "lark", "plum-dispatch==2.5.7", "ffmpeg-python", - "pytest", "tiktoken>=0.8.0", "Flask>=2.2", "python-multipart==0.0.20", "reactivex", "rxpy-backpressure @ git+https://github.com/dimensionalOS/rxpy-backpressure.git", - "pytest-asyncio==0.26.0", "asyncio==3.4.3", - "go2-webrtc-connect @ git+https://github.com/legion1581/go2_webrtc_connect.git@fe64abb5987594e8c048427a98445799f6f6a9cc", + "go2-webrtc-connect @ git+https://github.com/dimensionalOS/go2_webrtc_connect.git", # Web Extensions "fastapi>=0.115.6", @@ -73,8 +71,6 @@ dependencies = [ # Vector Embedding "sentence_transformers", - # CTransforms GGUF - "ctransformers[cuda]==0.2.27", # Perception Dependencies "ultralytics>=8.3.70", @@ -82,25 +78,20 @@ dependencies = [ "scipy>=1.15.1", "scikit-learn", "Pillow", - "mmengine>=0.10.3", - "mmcv>=2.1.0", + "clip @ git+https://github.com/openai/CLIP.git", "timm>=1.0.15", "lap>=0.5.12", - "xformers==0.0.20", - - # Detic - "mss", - "dataclasses", - "ftfy", - "regex", - "fasttext", - "lvis", - "nltk", - "clip @ git+https://github.com/openai/CLIP.git", - "detectron2 @ git+https://github.com/facebookresearch/detectron2.git@v0.6", - + # Mapping "open3d", + + # Inference + + "onnx", + + # Multiprocess + "dask[complete]==2025.5.1", + "lcm_msgs @ git+https://github.com/dimensionalOS/python_lcm_msgs.git@main#egg=lcm_msgs" ] [project.optional-dependencies] @@ -124,9 +115,38 @@ manipulation = [ "plotly>=5.9.0", ] +cpu = [ + # CPU inference backends + "onnxruntime", + "ctransformers==0.2.27", +] + cuda = [ "pycuda", - "onnxruntime-gpu[cuda,cudnn]" + "onnxruntime-gpu>=1.17.1,<=1.18.1", # Only versions supporting both cuda11 and cuda12 + "ctransformers[cuda]==0.2.27", + "mmengine>=0.10.3", + "mmcv>=2.1.0", + "xformers==0.0.20", + + # Detic GPU stack + "mss", + "dataclasses", + "ftfy", + "regex", + "fasttext", + "lvis", + "nltk", + "clip @ git+https://github.com/openai/CLIP.git", + "detectron2 @ git+https://github.com/facebookresearch/detectron2.git@v0.6", +] + +dev = [ + "ruff==0.11.10", + "mypy==1.15.0", + "pre_commit==4.2.0", + "pytest", + "pytest-asyncio==0.26.0" ] [tool.ruff] @@ -160,7 +180,6 @@ files = [ [tool.pytest.ini_options] testpaths = ["dimos"] -norecursedirs = ["dimos/robot/unitree/external"] markers = [ "vis: marks tests that run visuals and require a visual check by dev", "benchmark: benchmark, executes something multiple times, calculates avg, prints to console", diff --git a/requirements.txt b/requirements.txt index 6b1029483d..5faa7c8874 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,9 +24,7 @@ reactivex git+https://github.com/dimensionalOS/rxpy-backpressure.git pytest-asyncio==0.26.0 asyncio==3.4.3 --e git+https://github.com/legion1581/go2_webrtc_connect.git@fe64abb5987594e8c048427a98445799f6f6a9cc#egg=go2_webrtc_connect -#-e git+https://github.com/legion1581/aioice.git@ff5755a1e37127411b5fc797c105804db8437445#egg=aioice - +-e git+https://github.com/dimensionalOS/go2_webrtc_connect.git#egg=go2_webrtc_connect # Web Extensions fastapi>=0.115.6 sse-starlette>=2.2.1 @@ -42,15 +40,6 @@ pydantic # Developer Specific ipykernel -# Unitree webrtc streaming -aiortc==1.9.0 -pycryptodome -opencv-python -sounddevice -pyaudio -requests -wasmtime - # Audio openai-whisper soundfile @@ -97,6 +86,7 @@ open3d # Inference (CPU) onnxruntime +onnx # Terminal colors rich==14.0.0 diff --git a/tests/test_object_detection_stream.py b/tests/test_object_detection_stream.py index ed0a64fa9e..1cf8aeab01 100644 --- a/tests/test_object_detection_stream.py +++ b/tests/test_object_detection_stream.py @@ -27,8 +27,6 @@ from dimos.perception.object_detection_stream import ObjectDetectionStream from dimos.types.vector import Vector from dimos.utils.reactive import backpressure -from dimos.perception.detection2d.detic_2d_det import Detic2DDetector - from dotenv import load_dotenv @@ -103,9 +101,6 @@ def main(): class_filter = None # No class filtering web_port = 5555 - # Initialize detector - detector = Detic2DDetector(vocabulary=None, threshold=min_confidence) - # Initialize based on mode if args.mode == "robot": print("Initializing in robot mode...") @@ -166,9 +161,9 @@ def main(): camera_intrinsics=camera_intrinsics, min_confidence=min_confidence, class_filter=class_filter, - detector=detector, video_stream=video_stream, disable_depth=False, + draw_masks=True, ) # Set placeholder robot for cleanup