From 18c58e246156bb0c98192b7a0f6bfde312a7b222 Mon Sep 17 00:00:00 2001 From: lesh Date: Fri, 27 Jun 2025 18:08:38 -0700 Subject: [PATCH] removing data/* stuff --- dimos/data/__init__.py | 0 dimos/data/data_pipeline.py | 168 ------------------------ dimos/data/depth.py | 88 ------------- dimos/data/diffusion.py | 14 -- dimos/data/labels.py | 46 ------- dimos/data/pointcloud.py | 139 -------------------- dimos/data/segment.py | 92 ------------- dimos/data/videostream-data-pipeline.md | 30 ----- dimos/types/depth_map.py | 39 ------ dimos/types/pointcloud.py | 37 ------ 10 files changed, 653 deletions(-) delete mode 100644 dimos/data/__init__.py delete mode 100644 dimos/data/data_pipeline.py delete mode 100644 dimos/data/depth.py delete mode 100644 dimos/data/diffusion.py delete mode 100644 dimos/data/labels.py delete mode 100644 dimos/data/pointcloud.py delete mode 100644 dimos/data/segment.py delete mode 100644 dimos/data/videostream-data-pipeline.md delete mode 100644 dimos/types/depth_map.py delete mode 100644 dimos/types/pointcloud.py diff --git a/dimos/data/__init__.py b/dimos/data/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/dimos/data/data_pipeline.py b/dimos/data/data_pipeline.py deleted file mode 100644 index bebb881f48..0000000000 --- a/dimos/data/data_pipeline.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from dimos.stream.videostream import VideoStream - -import warnings -from collections import deque -from dimos.types.depth_map import DepthMapType -from dimos.types.label import LabelType -from dimos.types.pointcloud import PointCloudType -from dimos.types.segmentation import SegmentationType -import os - - -class DataPipeline: - def __init__( - self, - video_stream: VideoStream, - run_depth: bool = False, - run_labels: bool = False, - run_pointclouds: bool = False, - run_segmentations: bool = False, - max_workers: int = 4, - ): - self.video_stream = video_stream - self.run_depth = run_depth - self.run_labels = run_labels - self.run_pointclouds = run_pointclouds - self.run_segmentations = run_segmentations - self.max_workers = max_workers - - # Validate pipeline configuration - self._validate_pipeline() - - # Initialize the pipeline - self._initialize_pipeline() - - # Storage for processed data - self.generated_depth_maps = deque() - self.generated_labels = deque() - self.generated_pointclouds = deque() - self.generated_segmentations = deque() - - def _validate_pipeline(self): - """Validate the pipeline configuration based on dependencies.""" - if self.run_pointclouds and not self.run_depth: - raise ValueError( - "PointClouds generation requires Depth maps. Enable run_depth=True to use run_pointclouds=True." - ) - - if self.run_segmentations and not self.run_labels: - raise ValueError( - "Segmentations generation requires Labels. Enable run_labels=True to use run_segmentations=True." - ) - - if not any([self.run_depth, self.run_labels, self.run_pointclouds, self.run_segmentations]): - warnings.warn( - "No pipeline layers selected to run. The DataPipeline will be initialized without any processing." - ) - - def _initialize_pipeline(self): - """Initialize necessary components based on selected pipeline layers.""" - if self.run_depth: - from .depth import DepthProcessor - - self.depth_processor = DepthProcessor(debug=True) - print("Depth map generation enabled.") - else: - self.depth_processor = None - - if self.run_labels: - from .labels import LabelProcessor - - self.labels_processor = LabelProcessor(debug=True) - print("Label generation enabled.") - else: - self.labels_processor = None - - if self.run_pointclouds: - from .pointcloud import PointCloudProcessor - - self.pointcloud_processor = PointCloudProcessor(debug=True) - print("PointCloud generation enabled.") - else: - self.pointcloud_processor = None - - if self.run_segmentations: - from .segment import SegmentProcessor - - self.segmentation_processor = SegmentProcessor(debug=True) - print("Segmentation generation enabled.") - else: - self.segmentation_processor = None - - def run(self): - """Execute the selected pipeline layers.""" - try: - for frame in self.video_stream: - result = self._process_frame(frame) - depth_map, label, pointcloud, segmentation = result - - if depth_map is not None: - self.generated_depth_maps.append(depth_map) - if label is not None: - self.generated_labels.append(label) - if pointcloud is not None: - self.generated_pointclouds.append(pointcloud) - if segmentation is not None: - self.generated_segmentations.append(segmentation) - except KeyboardInterrupt: - print("Pipeline interrupted by user.") - - def _process_frame(self, frame): - """Process a single frame and return results.""" - depth_map = None - label = None - pointcloud = None - segmentation = None - - if self.run_depth: - depth_map = self.depth_processor.process(frame) - - if self.run_labels: - label = self.labels_processor.caption_image_data(frame) - - if ( - self.run_pointclouds - and isinstance(depth_map, DepthMapType) - and self.pointcloud_processor - ): - pointcloud = self.pointcloud_processor.process_frame(frame, depth_map.depth_data) - - if self.run_segmentations and isinstance(label, LabelType) and self.segmentation_processor: - segmentation = self.segmentation_processor.process_frame(frame, label.labels) - - return depth_map, label, pointcloud, segmentation - - def save_all_processed_data(self, directory: str): - """Save all processed data to files in the specified directory.""" - os.makedirs(directory, exist_ok=True) - - for i, depth_map in enumerate(self.generated_depth_maps): - if isinstance(depth_map, DepthMapType): - depth_map.save_to_file(os.path.join(directory, f"depth_map_{i}.npy")) - - for i, label in enumerate(self.generated_labels): - if isinstance(label, LabelType): - label.save_to_json(os.path.join(directory, f"labels_{i}.json")) - - for i, pointcloud in enumerate(self.generated_pointclouds): - if isinstance(pointcloud, PointCloudType): - pointcloud.save_to_file(os.path.join(directory, f"pointcloud_{i}.pcd")) - - for i, segmentation in enumerate(self.generated_segmentations): - if isinstance(segmentation, SegmentationType): - segmentation.save_masks(os.path.join(directory, f"segmentation_{i}")) diff --git a/dimos/data/depth.py b/dimos/data/depth.py deleted file mode 100644 index 893fd1d000..0000000000 --- a/dimos/data/depth.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dimos.models.depth.metric3d import Metric3D -from PIL import Image -import torch -import cv2 -import logging -import numpy as np -from dimos.types.depth_map import DepthMapType - - -class DepthProcessor: - def __init__(self, debug=False): - self.debug = debug - self.metric_3d = Metric3D() - self.depth_count = 0 - self.valid_depth_count = 0 - self.logger = logging.getLogger(__name__) - self.intrinsic = [707.0493, 707.0493, 604.0814, 180.5066] # Default intrinsic - - print("DepthProcessor initialized") - - if debug: - print("Running in debug mode") - self.logger.info("Running in debug mode") - - def process(self, frame: Image.Image, intrinsics=None): - """Process a frame to generate a depth map. - - Args: - frame: PIL Image to process - intrinsics: Optional camera intrinsics parameters - - Returns: - DepthMapType containing the depth map - """ - if intrinsics: - self.metric_3d.update_intrinsic(intrinsics) - else: - self.metric_3d.update_intrinsic(self.intrinsic) - - # Convert frame to numpy array suitable for processing - if isinstance(frame, Image.Image): - image = frame.convert("RGB") - elif isinstance(frame, np.ndarray): - image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - else: - raise ValueError("Unsupported frame format. Must be PIL Image or numpy array.") - - image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) - image_np = resize_image_for_vit(image_np) - - # Process image and run depth via Metric3D - try: - with torch.no_grad(): - depth_map = self.metric_3d.infer_depth(image_np) - - self.depth_count += 1 - - # Validate depth map - if is_depth_map_valid(np.array(depth_map)): - self.valid_depth_count += 1 - else: - self.logger.error("Invalid depth map for the provided frame.") - print("Invalid depth map for the provided frame.") - return None - - if self.debug: - # Save depth map locally or to S3 as needed - pass # Implement saving logic if required - - return DepthMapType(depth_data=depth_map, metadata={"intrinsics": intrinsics}) - - except Exception as e: - self.logger.error(f"Error processing frame: {e}") - return None diff --git a/dimos/data/diffusion.py b/dimos/data/diffusion.py deleted file mode 100644 index e99e9c2ef4..0000000000 --- a/dimos/data/diffusion.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - diff --git a/dimos/data/labels.py b/dimos/data/labels.py deleted file mode 100644 index 662af56f34..0000000000 --- a/dimos/data/labels.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from dimos.types.label import LabelType - - -class LabelProcessor: - def __init__(self, debug: bool = False): - self.model = None - self.prompt = 'Create a JSON representation where each entry consists of a key "object" with a numerical suffix starting from 1, and a corresponding "description" key with a value that is a concise, up to six-word sentence describing each main, distinct object or person in the image. Each pair should uniquely describe one element without repeating keys. An example: {"object1": { "description": "Man in red hat walking." },"object2": { "description": "Wooden pallet with boxes." },"object3": { "description": "Cardboard boxes stacked." },"object4": { "description": "Man in green vest standing." }}' - self.debug = debug - - def _initialize_model(self): - if self.model is None: - from dimos.models.labels.llava_34b import Llava - - self.model = Llava( - mmproj=f"{os.getcwd()}/models/mmproj-model-f16.gguf", - model_path=f"{os.getcwd()}/models/llava-v1.6-34b.Q4_K_M.gguf", - gpu=True, - ) - if self.debug: - print("Llava model initialized.") - - def caption_image_data(self, frame): - self._initialize_model() - try: - output = self.model.run_inference(frame, self.prompt, return_json=True) - if self.debug: - print("Output:", output) - return LabelType(labels=output, metadata={"frame_id": frame.id}) - except Exception as e: - print(f"Error in captioning image: {e}") - return LabelType(labels={}, metadata={"error": str(e)}) diff --git a/dimos/data/pointcloud.py b/dimos/data/pointcloud.py deleted file mode 100644 index 8d95635d2e..0000000000 --- a/dimos/data/pointcloud.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import cv2 -import numpy as np -import open3d as o3d -from PIL import Image -import logging - -from dimos.models.segmentation.segment_utils import apply_mask_to_image -from dimos.models.pointcloud.pointcloud_utils import ( - create_point_cloud_from_rgbd, - canonicalize_point_cloud, -) -from dimos.types.pointcloud import PointCloudType - -# Setup logging -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -class PointCloudProcessor: - def __init__(self, output_dir, intrinsic_parameters=None): - """ - Initializes the PointCloudProcessor. - - Args: - output_dir (str): The directory where point clouds will be saved. - intrinsic_parameters (dict, optional): Camera intrinsic parameters. - Defaults to None, in which case default parameters are used. - """ - self.output_dir = output_dir - os.makedirs(self.output_dir, exist_ok=True) - self.logger = logger - - # Default intrinsic parameters - self.default_intrinsic_parameters = { - "width": 640, - "height": 480, - "fx": 960.0, - "fy": 960.0, - "cx": 320.0, - "cy": 240.0, - } - self.intrinsic_parameters = ( - intrinsic_parameters if intrinsic_parameters else self.default_intrinsic_parameters - ) - - def process_frame(self, image, depth_map, masks): - """ - Process a single frame to generate point clouds. - - Args: - image (PIL.Image.Image or np.ndarray): The RGB image. - depth_map (PIL.Image.Image or np.ndarray): The depth map corresponding to the image. - masks (list of np.ndarray): A list of binary masks for segmentation. - - Returns: - list of PointCloudType: A list of point clouds for each mask. - bool: A flag indicating if the point clouds were canonicalized. - """ - try: - self.logger.info( - "STARTING POINT CLOUD PROCESSING ---------------------------------------" - ) - - # Convert images to OpenCV format if they are PIL Images - if isinstance(image, Image.Image): - original_image_cv = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2BGR) - else: - original_image_cv = image - - if isinstance(depth_map, Image.Image): - depth_image_cv = cv2.cvtColor(np.array(depth_map.convert("RGB")), cv2.COLOR_RGB2BGR) - else: - depth_image_cv = depth_map - - width, height = original_image_cv.shape[1], original_image_cv.shape[0] - intrinsic_parameters = self.intrinsic_parameters.copy() - intrinsic_parameters.update( - { - "width": width, - "height": height, - "cx": width / 2, - "cy": height / 2, - } - ) - - point_clouds = [] - point_cloud_data = [] - - # Create original point cloud - original_pcd = create_point_cloud_from_rgbd( - original_image_cv, depth_image_cv, intrinsic_parameters - ) - pcd, canonicalized, transformation = canonicalize_point_cloud( - original_pcd, canonicalize_threshold=0.3 - ) - - for idx, mask in enumerate(masks): - mask_binary = mask > 0 - - masked_rgb = apply_mask_to_image(original_image_cv, mask_binary) - masked_depth = apply_mask_to_image(depth_image_cv, mask_binary) - - pcd = create_point_cloud_from_rgbd(masked_rgb, masked_depth, intrinsic_parameters) - # Remove outliers - cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) - inlier_cloud = pcd.select_by_index(ind) - if canonicalized: - inlier_cloud.transform(transformation) - - point_clouds.append( - PointCloudType(point_cloud=inlier_cloud, metadata={"mask_index": idx}) - ) - # Save point cloud to file - pointcloud_filename = f"pointcloud_{idx}.pcd" - pointcloud_filepath = os.path.join(self.output_dir, pointcloud_filename) - o3d.io.write_point_cloud(pointcloud_filepath, inlier_cloud) - point_cloud_data.append(pointcloud_filepath) - self.logger.info(f"Saved point cloud {pointcloud_filepath}") - - self.logger.info("DONE POINT CLOUD PROCESSING ---------------------------------------") - return point_clouds, canonicalized - except Exception as e: - self.logger.error(f"Error processing frame: {e}") - return [], False diff --git a/dimos/data/segment.py b/dimos/data/segment.py deleted file mode 100644 index 5279235b4e..0000000000 --- a/dimos/data/segment.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cv2 -import numpy as np -from PIL import Image -import logging -from dimos.models.segmentation.segment_utils import sample_points_from_heatmap -from dimos.models.segmentation.sam import SAM -from dimos.models.segmentation.clipseg import CLIPSeg - -# Setup logging -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -class SegmentProcessor: - def __init__(self, device="cuda"): - # Initialize CLIPSeg and SAM models - self.clipseg = CLIPSeg(model_name="CIDAS/clipseg-rd64-refined", device=device) - self.sam = SAM(model_name="facebook/sam-vit-huge", device=device) - self.logger = logger - - def process_frame(self, image, captions): - """ - Process a single image and return segmentation masks. - - Args: - image (PIL.Image.Image or np.ndarray): The input image to process. - captions (list of str): A list of captions for segmentation. - - Returns: - list of np.ndarray: A list of segmentation masks corresponding to the captions. - """ - try: - self.logger.info("STARTING PROCESSING IMAGE ---------------------------------------") - self.logger.info(f"Processing image with captions: {captions}") - - # Convert image to PIL.Image if it's a numpy array - if isinstance(image, np.ndarray): - image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) - - preds = self.clipseg.run_inference(image, captions) - sampled_points = [] - sam_masks = [] - - original_size = image.size # (width, height) - - for idx in range(preds.shape[0]): - points = sample_points_from_heatmap(preds[idx][0], original_size, num_points=10) - if points: - sampled_points.append(points) - else: - self.logger.info(f"No points sampled for prediction index {idx}") - sampled_points.append([]) - - for idx in range(preds.shape[0]): - if sampled_points[idx]: - mask_tensor = self.sam.run_inference_from_points(image, [sampled_points[idx]]) - if mask_tensor: - # Convert mask tensor to a numpy array - mask = (255 * mask_tensor[0].numpy().squeeze()).astype(np.uint8) - sam_masks.append(mask) - else: - self.logger.info( - f"No mask tensor returned for sampled points at index {idx}" - ) - sam_masks.append( - np.zeros((original_size[1], original_size[0]), dtype=np.uint8) - ) - else: - self.logger.info( - f"No sampled points for prediction index {idx}, skipping mask inference" - ) - sam_masks.append(np.zeros((original_size[1], original_size[0]), dtype=np.uint8)) - - self.logger.info("DONE PROCESSING IMAGE ---------------------------------------") - return sam_masks - except Exception as e: - self.logger.error(f"Error processing image: {e}") - return [] diff --git a/dimos/data/videostream-data-pipeline.md b/dimos/data/videostream-data-pipeline.md deleted file mode 100644 index dd1fd96b66..0000000000 --- a/dimos/data/videostream-data-pipeline.md +++ /dev/null @@ -1,30 +0,0 @@ -# UNDER DEVELOPMENT 🚧🚧🚧 -# Example data pipeline from video stream implementation - -```bash - from dimos.stream.videostream import VideoStream - from dimos.data.data_pipeline import DataPipeline - - # init video stream from the camera source - video_stream = VideoStream(source=0) - - # init data pipeline with desired processors enabled, max workers is 4 by default - # depth only implementation - pipeline = DataPipeline( - video_stream=video_stream, - run_depth=True, - run_labels=False, - run_pointclouds=False, - run_segmentations=False - ) - - try: - # Run pipeline - pipeline.run() - except KeyboardInterrupt: - # Handle interrupt - print("Pipeline interrupted by user.") - finally: - # Release the video capture - video_stream.release() -``` diff --git a/dimos/types/depth_map.py b/dimos/types/depth_map.py deleted file mode 100644 index a076f6dd6b..0000000000 --- a/dimos/types/depth_map.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Any -import numpy as np - - -class DepthMapType: - def __init__(self, depth_data: np.ndarray, metadata: Any = None): - """ - Initializes a standardized depth map type. - - Args: - depth_data (np.ndarray): The depth map data as a numpy array. - metadata (Any, optional): Additional metadata related to the depth map. - """ - self.depth_data = depth_data - self.metadata = metadata - - def normalize(self): - """Normalize the depth data to a 0-1 range.""" - min_val = np.min(self.depth_data) - max_val = np.max(self.depth_data) - self.depth_data = (self.depth_data - min_val) / (max_val - min_val) - - def save_to_file(self, filepath: str): - """Save the depth map to a file.""" - np.save(filepath, self.depth_data) diff --git a/dimos/types/pointcloud.py b/dimos/types/pointcloud.py deleted file mode 100644 index 55c1aebc4a..0000000000 --- a/dimos/types/pointcloud.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2025 Dimensional Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import open3d as o3d -from typing import Any - - -class PointCloudType: - def __init__(self, point_cloud: o3d.geometry.PointCloud, metadata: Any = None): - """ - Initializes a standardized point cloud type. - - Args: - point_cloud (o3d.geometry.PointCloud): The point cloud data. - metadata (Any, optional): Additional metadata related to the point cloud. - """ - self.point_cloud = point_cloud - self.metadata = metadata - - def downsample(self, voxel_size: float): - """Downsample the point cloud using a voxel grid filter.""" - self.point_cloud = self.point_cloud.voxel_down_sample(voxel_size) - - def save_to_file(self, filepath: str): - """Save the point cloud to a file.""" - o3d.io.write_point_cloud(filepath, self.point_cloud)