diff --git a/assets/foxglove_image_sharpness_test.json b/assets/foxglove_image_sharpness_test.json new file mode 100644 index 0000000000..e68b79a7e4 --- /dev/null +++ b/assets/foxglove_image_sharpness_test.json @@ -0,0 +1,140 @@ +{ + "configById": { + "Image!1dpphsz": { + "cameraState": { + "distance": 20, + "perspective": true, + "phi": 60, + "target": [ + 0, + 0, + 0 + ], + "targetOffset": [ + 0, + 0, + 0 + ], + "targetOrientation": [ + 0, + 0, + 0, + 1 + ], + "thetaOffset": 45, + "fovy": 45, + "near": 0.5, + "far": 5000 + }, + "followMode": "follow-pose", + "scene": {}, + "transforms": {}, + "topics": {}, + "layers": {}, + "publish": { + "type": "point", + "poseTopic": "/move_base_simple/goal", + "pointTopic": "/clicked_point", + "poseEstimateTopic": "/initialpose", + "poseEstimateXDeviation": 0.5, + "poseEstimateYDeviation": 0.5, + "poseEstimateThetaDeviation": 0.26179939 + }, + "imageMode": { + "imageTopic": "/all" + } + }, + "Image!2xvd0hl": { + "cameraState": { + "distance": 20, + "perspective": true, + "phi": 60, + "target": [ + 0, + 0, + 0 + ], + "targetOffset": [ + 0, + 0, + 0 + ], + "targetOrientation": [ + 0, + 0, + 0, + 1 + ], + "thetaOffset": 45, + "fovy": 45, + "near": 0.5, + "far": 5000 + }, + "followMode": "follow-pose", + "scene": {}, + "transforms": {}, + "topics": {}, + "layers": {}, + "publish": { + "type": "point", + "poseTopic": "/move_base_simple/goal", + "pointTopic": "/clicked_point", + "poseEstimateTopic": "/initialpose", + "poseEstimateXDeviation": 0.5, + "poseEstimateYDeviation": 0.5, + "poseEstimateThetaDeviation": 0.26179939 + }, + "imageMode": { + "imageTopic": "/sharp" + } + }, + "Gauge!1iofczz": { + "path": "/sharpness.x", + "minValue": 0, + "maxValue": 1, + "colorMap": "red-yellow-green", + "colorMode": "colormap", + "gradient": [ + "#0000ff", + "#ff00ff" + ], + "reverse": false + }, + "Plot!1gy7vh9": { + "paths": [ + { + "timestampMethod": "receiveTime", + "value": "/sharpness.x", + "enabled": true, + "color": "#4e98e2" + } + ], + "showXAxisLabels": true, + "showYAxisLabels": true, + "showLegend": true, + "legendDisplay": "floating", + "showPlotValuesInLegend": false, + "isSynced": true, + "xAxisVal": "timestamp", + "sidebarDimension": 240 + } + }, + "globalVariables": {}, + "userNodes": {}, + "playbackConfig": { + "speed": 1 + }, + "layout": { + "first": { + "first": "Image!1dpphsz", + "second": "Image!2xvd0hl", + "direction": "row" + }, + "second": { + "first": "Gauge!1iofczz", + "second": "Plot!1gy7vh9", + "direction": "row" + }, + "direction": "column" + } +} diff --git a/dimos/msgs/sensor_msgs/Image.py b/dimos/msgs/sensor_msgs/Image.py index fb57cfcd3e..6845947603 100644 --- a/dimos/msgs/sensor_msgs/Image.py +++ b/dimos/msgs/sensor_msgs/Image.py @@ -14,17 +14,22 @@ import time from dataclasses import dataclass, field +from datetime import timedelta from enum import Enum from typing import Optional, Tuple import cv2 import numpy as np +import reactivex as rx # Import LCM types from dimos_lcm.sensor_msgs.Image import Image as LCMImage from dimos_lcm.std_msgs.Header import Header +from reactivex import operators as ops +from reactivex.observable import Observable +from reactivex.scheduler import ThreadPoolScheduler -from dimos.types.timestamped import Timestamped +from dimos.types.timestamped import Timestamped, TimestampedBufferCollection class ImageFormat(Enum): @@ -279,6 +284,40 @@ def crop(self, x: int, y: int, width: int, height: int) -> "Image": ts=self.ts, ) + def sharpness(self) -> float: + """ + Compute the Tenengrad focus measure for an image. + Returns a normalized value between 0 and 1, where 1 is sharpest. + + Uses adaptive normalization based on image statistics for better + discrimination across different image types. + """ + grayscale = self.to_grayscale() + # Sobel gradient computation in x and y directions + sx = cv2.Sobel(grayscale.data, cv2.CV_32F, 1, 0, ksize=5) + sy = cv2.Sobel(grayscale.data, cv2.CV_32F, 0, 1, ksize=5) + + # Compute gradient magnitude + magnitude = cv2.magnitude(sx, sy) + + mean_mag = magnitude.mean() + + # Use log-scale normalization for better discrimination + # This maps typical values more evenly across the 0-1 range: + # - Blurry images (mean ~50-150): 0.15-0.35 + # - Medium sharp (mean ~150-500): 0.35-0.65 + # - Sharp images (mean ~500-2000): 0.65-0.85 + # - Very sharp (mean >2000): 0.85-1.0 + + if mean_mag <= 0: + return 0.0 + + # Log scale with offset to handle the full range + # log10(50) ≈ 1.7, log10(5000) ≈ 3.7 + normalized = (np.log10(mean_mag + 1) - 1.7) / 2.0 + + return np.clip(normalized, 0.0, 1.0) + def save(self, filepath: str) -> bool: """Save image to file.""" # Convert to OpenCV format for saving @@ -456,3 +495,19 @@ def agent_encode(self) -> str: base64_str = base64.b64encode(jpeg_bytes).decode("utf-8") return base64_str + + +def sharpness_window(target_frequency: float, source: Observable[Image]) -> Observable[Image]: + window = TimestampedBufferCollection(1.0 / target_frequency) + source.subscribe(window.add) + + thread_scheduler = ThreadPoolScheduler(max_workers=1) + + def find_best(*argv): + if not window._items: + return None + return max(window._items, key=lambda x: x.sharpness()) + + return rx.interval(1.0 / target_frequency).pipe( + ops.observe_on(thread_scheduler), ops.map(find_best) + ) diff --git a/dimos/msgs/sensor_msgs/test_image.py b/dimos/msgs/sensor_msgs/test_image.py index 8e4e0a413f..7a3c84a8b6 100644 --- a/dimos/msgs/sensor_msgs/test_image.py +++ b/dimos/msgs/sensor_msgs/test_image.py @@ -15,8 +15,9 @@ import numpy as np import pytest -from dimos.msgs.sensor_msgs.Image import Image, ImageFormat +from dimos.msgs.sensor_msgs.Image import Image, ImageFormat, sharpness_window from dimos.utils.data import get_data +from dimos.utils.testing import TimedSensorReplay @pytest.fixture @@ -61,3 +62,67 @@ def test_opencv_conversion(img: Image): # artificially patch timestamp decoded_img.ts = img.ts assert decoded_img == img + + +@pytest.mark.tool +def test_sharpness_detector(): + get_data("unitree_office_walk") # Preload data for testing + video_store = TimedSensorReplay( + "unitree_office_walk/video", autocast=lambda x: Image.from_numpy(x).to_rgb() + ) + + cnt = 0 + for image in video_store.iterate(): + cnt = cnt + 1 + print(image.sharpness()) + if cnt > 30: + return + + +@pytest.mark.tool +def test_sharpness_sliding_window_foxglove(): + import time + + from dimos.msgs.geometry_msgs import Vector3 + from dimos.protocol.pubsub.lcmpubsub import LCM, Topic + + lcm = LCM() + lcm.start() + + ping = 0 + sharp_topic = Topic("/sharp", Image) + all_topic = Topic("/all", Image) + sharpness_topic = Topic("/sharpness", Vector3) + + get_data("unitree_office_walk") # Preload data for testing + video_stream = TimedSensorReplay( + "unitree_office_walk/video", autocast=lambda x: Image.from_numpy(x).to_rgb() + ).stream() + + # Publish all images to all_topic + video_stream.subscribe(lambda x: lcm.publish(all_topic, x)) + + def sharpness_vector(x: Image): + nonlocal ping + sharpness = x.sharpness() + if ping: + y = 1 + ping = ping - 1 + else: + y = 0 + + return Vector3([sharpness, y, 0]) + + video_stream.subscribe(lambda x: lcm.publish(sharpness_topic, sharpness_vector(x))) + + def pub_sharp(x: Image): + nonlocal ping + ping = 3 + lcm.publish(sharp_topic, x) + + sharpness_window( + 1, + source=video_stream, + ).subscribe(pub_sharp) + + time.sleep(120)