diff --git a/dimos/hardware/camera.py b/dimos/hardware/camera.py deleted file mode 100644 index 07c74ce508..0000000000 --- a/dimos/hardware/camera.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dimos.hardware.sensor import AbstractSensor - - -class Camera(AbstractSensor): - def __init__(self, resolution=None, focal_length=None, sensor_size=None, sensor_type="Camera"): - super().__init__(sensor_type) - self.resolution = resolution # (width, height) in pixels - self.focal_length = focal_length # in millimeters - self.sensor_size = sensor_size # (width, height) in millimeters - - def get_sensor_type(self): - return self.sensor_type - - def calculate_intrinsics(self): - if not self.resolution or not self.focal_length or not self.sensor_size: - raise ValueError("Resolution, focal length, and sensor size must be provided") - - # Calculate pixel size - pixel_size_x = self.sensor_size[0] / self.resolution[0] - pixel_size_y = self.sensor_size[1] / self.resolution[1] - - # Calculate the principal point (assuming it's at the center of the image) - principal_point_x = self.resolution[0] / 2 - principal_point_y = self.resolution[1] / 2 - - # Calculate the focal length in pixels - focal_length_x = self.focal_length / pixel_size_x - focal_length_y = self.focal_length / pixel_size_y - - return { - "focal_length_x": focal_length_x, - "focal_length_y": focal_length_y, - "principal_point_x": principal_point_x, - "principal_point_y": principal_point_y, - } - - def get_intrinsics(self): - return self.calculate_intrinsics() diff --git a/dimos/hardware/camera/zed/__init__.py b/dimos/hardware/camera/zed/__init__.py new file mode 100644 index 0000000000..3c39045606 --- /dev/null +++ b/dimos/hardware/camera/zed/__init__.py @@ -0,0 +1,55 @@ +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ZED camera hardware interfaces.""" + +from pathlib import Path +from dimos.msgs.sensor_msgs.CameraInfo import CalibrationProvider + +# Check if ZED SDK is available +try: + import pyzed.sl as sl + + HAS_ZED_SDK = True +except ImportError: + HAS_ZED_SDK = False + +# Only import ZED classes if SDK is available +if HAS_ZED_SDK: + from dimos.hardware.camera.zed.camera import ZEDCamera, ZEDModule +else: + # Provide stub classes when SDK is not available + class ZEDCamera: + def __init__(self, *args, **kwargs): + raise ImportError( + "ZED SDK not installed. Please install pyzed package to use ZED camera functionality." + ) + + class ZEDModule: + def __init__(self, *args, **kwargs): + raise ImportError( + "ZED SDK not installed. Please install pyzed package to use ZED camera functionality." + ) + + +# Set up camera calibration provider (always available) +CALIBRATION_DIR = Path(__file__).parent +CameraInfo = CalibrationProvider(CALIBRATION_DIR) + +__all__ = [ + "ZEDCamera", + "ZEDModule", + "HAS_ZED_SDK", + "CameraInfo", +] diff --git a/dimos/hardware/zed_camera.py b/dimos/hardware/camera/zed/camera.py similarity index 98% rename from dimos/hardware/zed_camera.py rename to dimos/hardware/camera/zed/camera.py index 7f11dc78af..6822c19afb 100644 --- a/dimos/hardware/zed_camera.py +++ b/dimos/hardware/camera/zed/camera.py @@ -12,38 +12,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np +from typing import Any, Dict, Optional, Tuple + import cv2 +import numpy as np import open3d as o3d -from typing import Optional, Tuple, Dict, Any -import logging -import time -import threading +import pyzed.sl as sl +from dimos_lcm.sensor_msgs import CameraInfo from reactivex import interval -from reactivex import operators as ops - -try: - import pyzed.sl as sl -except ImportError: - sl = None - logging.warning("ZED SDK not found. Please install pyzed to use ZED camera functionality.") -from dimos.hardware.stereo_camera import StereoCamera from dimos.core import Module, Out, rpc -from dimos.utils.logging_config import setup_logger -from dimos.protocol.tf import TF -from dimos.msgs.geometry_msgs import Transform, Vector3, Quaternion +from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Vector3 # Import LCM message types from dimos.msgs.sensor_msgs import Image, ImageFormat -from dimos_lcm.sensor_msgs import CameraInfo -from dimos.msgs.geometry_msgs import PoseStamped from dimos.msgs.std_msgs import Header +from dimos.protocol.tf import TF +from dimos.utils.logging_config import setup_logger logger = setup_logger(__name__) -class ZEDCamera(StereoCamera): +class ZEDCamera: """ZED Camera capture node with neural depth processing.""" def __init__( diff --git a/dimos/hardware/camera/zed/single_webcam.yaml b/dimos/hardware/camera/zed/single_webcam.yaml new file mode 100644 index 0000000000..1ce9457559 --- /dev/null +++ b/dimos/hardware/camera/zed/single_webcam.yaml @@ -0,0 +1,27 @@ +# for cv2.VideoCapture and cutting only half of the frame +image_width: 640 +image_height: 376 +camera_name: zed_webcam_single +camera_matrix: + rows: 3 + cols: 3 + data: [379.45267, 0. , 302.43516, + 0. , 380.67871, 228.00954, + 0. , 0. , 1. ] +distortion_model: plumb_bob +distortion_coefficients: + rows: 1 + cols: 5 + data: [-0.309435, 0.092185, -0.009059, 0.003708, 0.000000] +rectification_matrix: + rows: 3 + cols: 3 + data: [1., 0., 0., + 0., 1., 0., + 0., 0., 1.] +projection_matrix: + rows: 3 + cols: 4 + data: [291.12888, 0. , 304.94086, 0. , + 0. , 347.95022, 231.8885 , 0. , + 0. , 0. , 1. , 0. ] diff --git a/dimos/hardware/camera/zed/test_zed.py b/dimos/hardware/camera/zed/test_zed.py new file mode 100644 index 0000000000..ce1bef0b54 --- /dev/null +++ b/dimos/hardware/camera/zed/test_zed.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# Copyright 2025 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo + + +def test_zed_import_and_calibration_access(): + """Test that zed module can be imported and calibrations accessed.""" + # Import zed module from camera + from dimos.hardware.camera import zed + + # Test that CameraInfo is accessible + assert hasattr(zed, "CameraInfo") + + # Test snake_case access + camera_info_snake = zed.CameraInfo.single_webcam + assert isinstance(camera_info_snake, CameraInfo) + assert camera_info_snake.width == 640 + assert camera_info_snake.height == 376 + assert camera_info_snake.distortion_model == "plumb_bob" + + # Test PascalCase access + camera_info_pascal = zed.CameraInfo.SingleWebcam + assert isinstance(camera_info_pascal, CameraInfo) + assert camera_info_pascal.width == 640 + assert camera_info_pascal.height == 376 + + # Verify both access methods return the same cached object + assert camera_info_snake is camera_info_pascal + + print("✓ ZED import and calibration access test passed!") diff --git a/dimos/hardware/interface.py b/dimos/hardware/interface.py deleted file mode 100644 index 9d7797a569..0000000000 --- a/dimos/hardware/interface.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dimos.hardware.end_effector import EndEffector -from dimos.hardware.camera import Camera -from dimos.hardware.stereo_camera import StereoCamera -from dimos.hardware.ufactory import UFactory7DOFArm - - -class HardwareInterface: - def __init__( - self, - end_effector: EndEffector = None, - sensors: list = None, - arm_architecture: UFactory7DOFArm = None, - ): - self.end_effector = end_effector - self.sensors = sensors if sensors is not None else [] - self.arm_architecture = arm_architecture - - def get_configuration(self): - """Return the current hardware configuration.""" - return { - "end_effector": self.end_effector, - "sensors": [sensor.get_sensor_type() for sensor in self.sensors], - "arm_architecture": self.arm_architecture, - } - - def set_configuration(self, configuration): - """Set the hardware configuration.""" - self.end_effector = configuration.get("end_effector", self.end_effector) - self.sensors = configuration.get("sensors", self.sensors) - self.arm_architecture = configuration.get("arm_architecture", self.arm_architecture) - - def add_sensor(self, sensor): - """Add a sensor to the hardware interface.""" - if isinstance(sensor, (Camera, StereoCamera)): - self.sensors.append(sensor) - else: - raise ValueError("Sensor must be a Camera or StereoCamera instance.") diff --git a/dimos/hardware/stereo_camera.py b/dimos/hardware/stereo_camera.py deleted file mode 100644 index 4ffdc51811..0000000000 --- a/dimos/hardware/stereo_camera.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dimos.hardware.camera import Camera - - -class StereoCamera(Camera): - def __init__(self, baseline=None, **kwargs): - super().__init__(**kwargs) - self.baseline = baseline - - def get_intrinsics(self): - intrinsics = super().get_intrinsics() - intrinsics["baseline"] = self.baseline - return intrinsics diff --git a/dimos/hardware/webcam.py b/dimos/hardware/webcam.py index c45cb59515..d7413ccb1b 100644 --- a/dimos/hardware/webcam.py +++ b/dimos/hardware/webcam.py @@ -18,7 +18,7 @@ from abc import ABC, abstractmethod, abstractproperty from dataclasses import dataclass, field from functools import cache -from typing import Any, Callable, Generic, Optional, Protocol, TypeVar, Literal +from typing import Any, Callable, Generic, Literal, Optional, Protocol, TypeVar import cv2 import numpy as np @@ -153,12 +153,11 @@ def capture_frame(self) -> Image: image = Image.from_numpy( frame_rgb, format=ImageFormat.RGB, # We converted to RGB above - frame_id=self._frame("camera"), # Standard frame ID for camera images + frame_id=self._frame("camera_optical"), # Standard frame ID for camera images ts=time.time(), # Current timestamp ) if self.config.stereo_slice in ("left", "right"): - image.frame_id = self._frame(f"camera_stereo_{self.config.stereo_slice}") half_width = image.width // 2 if self.config.stereo_slice == "left": image = image.crop(0, 0, half_width, image.height) diff --git a/dimos/msgs/sensor_msgs/CameraInfo.py b/dimos/msgs/sensor_msgs/CameraInfo.py index 70a99e35ec..5ce0f76353 100644 --- a/dimos/msgs/sensor_msgs/CameraInfo.py +++ b/dimos/msgs/sensor_msgs/CameraInfo.py @@ -26,8 +26,8 @@ # Import ROS types try: from sensor_msgs.msg import CameraInfo as ROSCameraInfo - from std_msgs.msg import Header as ROSHeader from sensor_msgs.msg import RegionOfInterest as ROSRegionOfInterest + from std_msgs.msg import Header as ROSHeader ROS_AVAILABLE = True except ImportError: @@ -98,6 +98,51 @@ def __init__( self.roi_width = 0 self.roi_do_rectify = False + @classmethod + def from_yaml(cls, yaml_file: str) -> CameraInfo: + """Create CameraInfo from YAML file. + + Args: + yaml_file: Path to YAML file containing camera calibration data + + Returns: + CameraInfo instance with loaded calibration data + """ + import yaml + + with open(yaml_file, "r") as f: + data = yaml.safe_load(f) + + # Extract basic parameters + width = data.get("image_width", 0) + height = data.get("image_height", 0) + distortion_model = data.get("distortion_model", "") + + # Extract matrices + camera_matrix = data.get("camera_matrix", {}) + K = camera_matrix.get("data", [0.0] * 9) + + distortion_coeffs = data.get("distortion_coefficients", {}) + D = distortion_coeffs.get("data", []) + + rect_matrix = data.get("rectification_matrix", {}) + R = rect_matrix.get("data", [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0]) + + proj_matrix = data.get("projection_matrix", {}) + P = proj_matrix.get("data", [0.0] * 12) + + # Create CameraInfo instance + return cls( + height=height, + width=width, + distortion_model=distortion_model, + D=D, + K=K, + R=R, + P=P, + frame_id="camera_optical", + ) + def get_K_matrix(self) -> np.ndarray: """Get intrinsic matrix as numpy array.""" return np.array(self.K, dtype=np.float64).reshape(3, 3) @@ -326,3 +371,103 @@ def __eq__(self, other) -> bool: and self.binning_y == other.binning_y and self.frame_id == other.frame_id ) + + +class CalibrationProvider: + """Provides lazy-loaded access to camera calibration YAML files in a directory.""" + + def __init__(self, calibration_dir): + """Initialize with a directory containing calibration YAML files. + + Args: + calibration_dir: Path to directory containing .yaml calibration files + """ + from pathlib import Path + + self._calibration_dir = Path(calibration_dir) + self._cache = {} + + def _to_snake_case(self, name: str) -> str: + """Convert PascalCase to snake_case.""" + import re + + # Insert underscore before capital letters (except first char) + s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) + # Insert underscore before capital letter followed by lowercase + return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower() + + def _find_yaml_file(self, name: str): + """Find YAML file matching the given name (tries both snake_case and exact match). + + Args: + name: Attribute name to look for + + Returns: + Path to YAML file if found, None otherwise + """ + # Try exact match first + yaml_file = self._calibration_dir / f"{name}.yaml" + if yaml_file.exists(): + return yaml_file + + # Try snake_case conversion for PascalCase names + snake_name = self._to_snake_case(name) + if snake_name != name: + yaml_file = self._calibration_dir / f"{snake_name}.yaml" + if yaml_file.exists(): + return yaml_file + + return None + + def __getattr__(self, name: str) -> CameraInfo: + """Load calibration YAML file on first access. + + Supports both snake_case and PascalCase attribute names. + For example, both 'single_webcam' and 'SingleWebcam' will load 'single_webcam.yaml'. + + Args: + name: Attribute name (can be PascalCase or snake_case) + + Returns: + CameraInfo object loaded from the YAML file + + Raises: + AttributeError: If no matching YAML file exists + """ + # Check cache first + if name in self._cache: + return self._cache[name] + + # Also check if the snake_case version is cached (for PascalCase access) + snake_name = self._to_snake_case(name) + if snake_name != name and snake_name in self._cache: + return self._cache[snake_name] + + # Find matching YAML file + yaml_file = self._find_yaml_file(name) + if not yaml_file: + raise AttributeError(f"No calibration file found for: {name}") + + # Load and cache the CameraInfo + camera_info = CameraInfo.from_yaml(str(yaml_file)) + + # Cache both the requested name and the snake_case version + self._cache[name] = camera_info + if snake_name != name: + self._cache[snake_name] = camera_info + + return camera_info + + def __dir__(self): + """List available calibrations in both snake_case and PascalCase.""" + calibrations = [] + if self._calibration_dir.exists() and self._calibration_dir.is_dir(): + yaml_files = self._calibration_dir.glob("*.yaml") + for f in yaml_files: + stem = f.stem + calibrations.append(stem) + # Add PascalCase version + pascal = "".join(word.capitalize() for word in stem.split("_")) + if pascal != stem: + calibrations.append(pascal) + return calibrations diff --git a/dimos/msgs/sensor_msgs/test_CameraInfo.py b/dimos/msgs/sensor_msgs/test_CameraInfo.py index 0c755f74f5..fe4076a325 100644 --- a/dimos/msgs/sensor_msgs/test_CameraInfo.py +++ b/dimos/msgs/sensor_msgs/test_CameraInfo.py @@ -13,9 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import numpy as np - +import pytest try: from sensor_msgs.msg import CameraInfo as ROSCameraInfo @@ -26,13 +25,8 @@ ROSRegionOfInterest = None ROSHeader = None -from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo - -# Try to import ROS types for testing -try: - ROS_AVAILABLE = True -except ImportError: - ROS_AVAILABLE = False +from dimos.msgs.sensor_msgs.CameraInfo import CalibrationProvider, CameraInfo +from dimos.utils.path_utils import get_project_root def test_lcm_encode_decode(): @@ -196,10 +190,6 @@ def test_numpy_matrix_operations(): @pytest.mark.ros def test_ros_conversion(): """Test ROS message conversion preserves CameraInfo data.""" - if not ROS_AVAILABLE: - print("\nROS packages not available - skipping ROS conversion test") - return - print("\nTesting ROS CameraInfo conversion...") # Create test camera info @@ -401,9 +391,68 @@ def test_equality(): print("✓ Equality comparison works correctly") -if __name__ == "__main__": - test_lcm_encode_decode() - test_numpy_matrix_operations() - test_ros_conversion() - test_equality() - print("\n✓✓✓ All CameraInfo tests passed! ✓✓✓") +def test_camera_info_from_yaml(): + """Test loading CameraInfo from YAML file.""" + + # Get path to the single webcam YAML file + yaml_path = get_project_root() / "dimos" / "hardware" / "camera" / "zed" / "single_webcam.yaml" + + # Load CameraInfo from YAML + camera_info = CameraInfo.from_yaml(str(yaml_path)) + + # Verify loaded values + assert camera_info.width == 640 + assert camera_info.height == 376 + assert camera_info.distortion_model == "plumb_bob" + assert camera_info.frame_id == "camera_optical" + + # Check camera matrix K + K = camera_info.get_K_matrix() + assert K.shape == (3, 3) + assert np.isclose(K[0, 0], 379.45267) # fx + assert np.isclose(K[1, 1], 380.67871) # fy + assert np.isclose(K[0, 2], 302.43516) # cx + assert np.isclose(K[1, 2], 228.00954) # cy + + # Check distortion coefficients + D = camera_info.get_D_coeffs() + assert len(D) == 5 + assert np.isclose(D[0], -0.309435) + + # Check projection matrix P + P = camera_info.get_P_matrix() + assert P.shape == (3, 4) + assert np.isclose(P[0, 0], 291.12888) + + print("✓ CameraInfo loaded successfully from YAML file") + + +def test_calibration_provider(): + """Test CalibrationProvider lazy loading of YAML files.""" + # Get the directory containing calibration files (not the file itself) + calibration_dir = get_project_root() / "dimos" / "hardware" / "camera" / "zed" + + # Create CalibrationProvider instance + Calibrations = CalibrationProvider(calibration_dir) + + # Test lazy loading of single_webcam.yaml using snake_case + camera_info = Calibrations.single_webcam + assert isinstance(camera_info, CameraInfo) + assert camera_info.width == 640 + assert camera_info.height == 376 + + # Test PascalCase access to same calibration + camera_info2 = Calibrations.SingleWebcam + assert isinstance(camera_info2, CameraInfo) + assert camera_info2.width == 640 + assert camera_info2.height == 376 + + # Test caching - both access methods should return same object + assert camera_info is camera_info2 # Same object reference + + # Test __dir__ lists available calibrations in both cases + available = dir(Calibrations) + assert "single_webcam" in available + assert "SingleWebcam" in available + + print("✓ CalibrationProvider test passed with both naming conventions!") diff --git a/dimos/perception/semantic_seg.py b/dimos/perception/semantic_seg.py deleted file mode 100644 index 6626e3bc9f..0000000000 --- a/dimos/perception/semantic_seg.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dimos.perception.segmentation import Sam2DSegmenter -from dimos.models.depth.metric3d import Metric3D -from dimos.hardware.camera import Camera -from reactivex import Observable -from reactivex import operators as ops -from dimos.types.segmentation import SegmentationType -import numpy as np -import cv2 - - -class SemanticSegmentationStream: - def __init__( - self, - enable_mono_depth: bool = True, - enable_rich_labeling: bool = True, - camera_params: dict = None, - gt_depth_scale=256.0, - ): - """ - Initialize a semantic segmentation stream using Sam2DSegmenter. - - Args: - enable_mono_depth: Whether to enable monocular depth processing - enable_rich_labeling: Whether to enable rich labeling - camera_params: Dictionary containing either: - - Direct intrinsics: [fx, fy, cx, cy] - - Physical parameters: resolution, focal_length, sensor_size - """ - self.segmenter = Sam2DSegmenter( - min_analysis_interval=5.0, - use_tracker=True, - use_analyzer=True, - use_rich_labeling=enable_rich_labeling, - ) - - self.enable_mono_depth = enable_mono_depth - if enable_mono_depth: - self.depth_model = Metric3D(gt_depth_scale) - - if camera_params: - # Check if direct intrinsics are provided - if "intrinsics" in camera_params: - intrinsics = camera_params["intrinsics"] - if len(intrinsics) != 4: - raise ValueError("Intrinsics must be a list of 4 values: [fx, fy, cx, cy]") - self.depth_model.update_intrinsic(intrinsics) - else: - # Create camera object and calculate intrinsics from physical parameters - self.camera = Camera( - resolution=camera_params.get("resolution"), - focal_length=camera_params.get("focal_length"), - sensor_size=camera_params.get("sensor_size"), - ) - intrinsics = self.camera.calculate_intrinsics() - self.depth_model.update_intrinsic( - [ - intrinsics["focal_length_x"], - intrinsics["focal_length_y"], - intrinsics["principal_point_x"], - intrinsics["principal_point_y"], - ] - ) - else: - raise ValueError("Camera parameters are required for monocular depth processing.") - - def create_stream(self, video_stream: Observable) -> Observable[SegmentationType]: - """ - Create an Observable stream of segmentation results from a video stream. - - Args: - video_stream: Observable that emits video frames - - Returns: - Observable that emits SegmentationType objects containing masks and metadata - """ - - def process_frame(frame): - # Process image and get results - masks, bboxes, target_ids, probs, names = self.segmenter.process_image(frame) - - # Run analysis if enabled - if self.segmenter.use_analyzer: - self.segmenter.run_analysis(frame, bboxes, target_ids) - names = self.segmenter.get_object_names(target_ids, names) - - viz_frame = self.segmenter.visualize_results( - frame, masks, bboxes, target_ids, probs, names - ) - - # Process depth if enabled - depth_viz = None - object_depths = [] - if self.enable_mono_depth: - # Get depth map - depth_map = self.depth_model.infer_depth(frame) - depth_map = np.array(depth_map) - - # Calculate average depth for each object - object_depths = [] - for mask in masks: - # Convert mask to numpy if needed - mask_np = mask.cpu().numpy() if hasattr(mask, "cpu") else mask - # Get depth values where mask is True - object_depth = depth_map[mask_np > 0.5] - # Calculate average depth (in meters) - avg_depth = np.mean(object_depth) if len(object_depth) > 0 else 0 - object_depths.append(avg_depth / 1000) - - # Create colorized depth visualization - depth_viz = self._create_depth_visualization(depth_map) - - # Overlay depth values on the visualization frame - for bbox, depth in zip(bboxes, object_depths): - x1, y1, x2, y2 = map(int, bbox) - # Draw depth text at bottom left of bounding box - depth_text = f"{depth:.2f}mm" - # Add black background for better visibility - text_size = cv2.getTextSize(depth_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] - cv2.rectangle( - viz_frame, - (x1, y2 - text_size[1] - 5), - (x1 + text_size[0], y2), - (0, 0, 0), - -1, - ) - # Draw text in white - cv2.putText( - viz_frame, - depth_text, - (x1, y2 - 5), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - (255, 255, 255), - 2, - ) - - # Create metadata in the new requested format - objects = [] - for i in range(len(bboxes)): - obj_data = { - "object_id": target_ids[i] if i < len(target_ids) else None, - "bbox": bboxes[i], - "prob": probs[i] if i < len(probs) else None, - "label": names[i] if i < len(names) else None, - } - - # Add depth if available - if self.enable_mono_depth and i < len(object_depths): - obj_data["depth"] = object_depths[i] - - objects.append(obj_data) - - # Create the new metadata dictionary - metadata = {"frame": frame, "viz_frame": viz_frame, "objects": objects} - - # Add depth visualization if available - if depth_viz is not None: - metadata["depth_viz"] = depth_viz - - # Convert masks to numpy arrays if they aren't already - numpy_masks = [mask.cpu().numpy() if hasattr(mask, "cpu") else mask for mask in masks] - - return SegmentationType(masks=numpy_masks, metadata=metadata) - - return video_stream.pipe(ops.map(process_frame)) - - def _create_depth_visualization(self, depth_map): - """ - Create a colorized visualization of the depth map. - - Args: - depth_map: Raw depth map in meters - - Returns: - Colorized depth map visualization - """ - # Normalize depth map to 0-255 range for visualization - depth_min = np.min(depth_map) - depth_max = np.max(depth_map) - depth_normalized = ((depth_map - depth_min) / (depth_max - depth_min) * 255).astype( - np.uint8 - ) - - # Apply colormap (using JET colormap for better depth perception) - depth_colored = cv2.applyColorMap(depth_normalized, cv2.COLORMAP_JET) - - # Add depth scale bar - scale_height = 30 - scale_width = depth_map.shape[1] # Match width with depth map - scale_bar = np.zeros((scale_height, scale_width, 3), dtype=np.uint8) - - # Create gradient for scale bar - for i in range(scale_width): - color = cv2.applyColorMap( - np.array([[i * 255 // scale_width]], dtype=np.uint8), cv2.COLORMAP_JET - ) - scale_bar[:, i] = color[0, 0] - - # Add depth values to scale bar - cv2.putText( - scale_bar, - f"{depth_min:.1f}mm", - (5, 20), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - (255, 255, 255), - 1, - ) - cv2.putText( - scale_bar, - f"{depth_max:.1f}mm", - (scale_width - 60, 20), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - (255, 255, 255), - 1, - ) - - # Combine depth map and scale bar - combined_viz = np.vstack((depth_colored, scale_bar)) - - return combined_viz - - def cleanup(self): - """Clean up resources.""" - self.segmenter.cleanup() - if self.enable_mono_depth: - del self.depth_model diff --git a/dimos/robot/agilex/piper_arm.py b/dimos/robot/agilex/piper_arm.py index 2c917b71fb..bb22db46b5 100644 --- a/dimos/robot/agilex/piper_arm.py +++ b/dimos/robot/agilex/piper_arm.py @@ -17,7 +17,7 @@ from typing import Optional, List from dimos import core -from dimos.hardware.zed_camera import ZEDModule +from dimos.hardware.camera.zed import ZEDModule from dimos.manipulation.visual_servoing.manipulation_module import ManipulationModule from dimos.msgs.sensor_msgs import Image from dimos.protocol import pubsub diff --git a/dimos/robot/unitree_webrtc/unitree_g1.py b/dimos/robot/unitree_webrtc/unitree_g1.py index 2963c2cfc6..18f9f95ae0 100644 --- a/dimos/robot/unitree_webrtc/unitree_g1.py +++ b/dimos/robot/unitree_webrtc/unitree_g1.py @@ -48,11 +48,11 @@ logger = setup_logger("dimos.robot.unitree_webrtc.unitree_g1", level=logging.INFO) -# try: -# from dimos.hardware.zed_camera import ZEDModule -# except ImportError: -# logger.warning("ZEDModule not found. Please install pyzed to use ZED camera functionality.") -# ZEDModule = None +try: + from dimos.hardware.camera.zed import ZEDModule +except ImportError: + logger.warning("ZEDModule not found. Please install pyzed to use ZED camera functionality.") + ZEDModule = None # Suppress verbose loggers logging.getLogger("aiortc.codecs.h264").setLevel(logging.ERROR) diff --git a/dimos/stream/stereo_camera_streams/zed.py b/dimos/stream/stereo_camera_streams/zed.py deleted file mode 100644 index ceb7eac58b..0000000000 --- a/dimos/stream/stereo_camera_streams/zed.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import time -from typing import Optional, Dict - -from reactivex import Observable, create -from reactivex import operators as ops - -from dimos.hardware.zed_camera import ZEDCamera - -try: - import pyzed.sl as sl -except ImportError: - sl = None - logging.warning("ZED SDK not found. Please install pyzed to use ZED camera functionality.") - -logger = logging.getLogger(__name__) - - -class ZEDCameraStream: - """ZED Camera stream that provides RGB and depth data as observables.""" - - def __init__( - self, - camera_id: int = 0, - resolution: Optional["sl.RESOLUTION"] = None, - depth_mode: Optional["sl.DEPTH_MODE"] = None, - fps: int = 30, - ): - """ - Initialize ZED camera stream. - - Args: - camera_id: Camera ID (0 for first ZED) - resolution: ZED camera resolution (defaults to HD720) - depth_mode: Depth computation mode (defaults to NEURAL) - fps: Camera frame rate (default: 30) - """ - if sl is None: - raise ImportError("ZED SDK not installed. Please install pyzed package.") - - self.camera_id = camera_id - self.fps = fps - - # Set default values if not provided - if resolution is None: - resolution = sl.RESOLUTION.HD720 - if depth_mode is None: - depth_mode = sl.DEPTH_MODE.NEURAL - - self.resolution = resolution - self.depth_mode = depth_mode - - # Initialize ZED camera - self.zed_camera = ZEDCamera( - camera_id=camera_id, resolution=resolution, depth_mode=depth_mode, fps=fps - ) - - self.is_opened = False - - def _initialize_camera(self) -> None: - """Initialize the ZED camera if not already initialized.""" - if not self.is_opened: - if not self.zed_camera.open(): - raise RuntimeError(f"Failed to open ZED camera {self.camera_id}") - self.is_opened = True - logger.info(f"ZED camera {self.camera_id} opened successfully") - - def create_stream(self) -> Observable: - """ - Create an observable stream of RGB and depth frames. - - Returns: - Observable: An observable emitting dictionaries with 'rgb' and 'depth' keys. - """ - - def emit_frames(observer, scheduler): - try: - # Initialize camera - if not self.is_opened: - self._initialize_camera() - - while True: - # Capture frame directly - left_img, right_img, depth_img = self.zed_camera.capture_frame() - - if left_img is not None and depth_img is not None: - frame_data = { - "rgb": left_img, - "depth": depth_img, - "right": right_img, - "timestamp": time.time(), - } - - observer.on_next(frame_data) - - except Exception as e: - logger.error(f"Error during ZED frame emission: {e}") - observer.on_error(e) - finally: - # Clean up resources - self._cleanup_camera() - observer.on_completed() - - return create(emit_frames).pipe( - ops.share(), # Share the stream among multiple subscribers - ) - - def get_camera_info(self) -> Dict[str, float]: - """ - Get ZED camera intrinsics (fx, fy, cx, cy). - - Returns: - Dictionary containing camera intrinsics: fx, fy, cx, cy - """ - if not self.is_opened: - self._initialize_camera() - - try: - camera_info = self.zed_camera.get_camera_info() - left_cam = camera_info.get("left_cam", {}) - - return { - "fx": left_cam.get("fx", 0.0), - "fy": left_cam.get("fy", 0.0), - "cx": left_cam.get("cx", 0.0), - "cy": left_cam.get("cy", 0.0), - } - except Exception as e: - logger.error(f"Error getting camera info: {e}") - return {} - - def _cleanup_camera(self) -> None: - """Clean up camera resources.""" - if self.is_opened: - self.zed_camera.close() - self.is_opened = False - logger.info("ZED camera resources cleaned up") - - def cleanup(self) -> None: - """Clean up all resources.""" - self._cleanup_camera() - - def __enter__(self): - """Context manager entry.""" - self._initialize_camera() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """Context manager exit.""" - self._cleanup_camera()