From ddce549809297ce013517b17136d30121eeb5d13 Mon Sep 17 00:00:00 2001 From: Matt Dawkins Date: Tue, 13 Jan 2026 22:27:28 -0500 Subject: [PATCH 01/30] Check in WIP --- client/bundle.css | 739 ++++++++++++++++++ client/dive-common/apispec.ts | 114 +++ .../recipes/segmentationpointclick.ts | 690 ++++++++++++++++ client/dive-common/use/useModeManager.ts | 47 +- client/platform/desktop/backend/ipcService.ts | 100 +++ .../desktop/backend/native/segmentation.ts | 529 +++++++++++++ client/platform/desktop/frontend/api.ts | 53 ++ .../frontend/components/ViewerLoader.vue | 259 +++++- client/platform/web-girder/api/rpc.service.ts | 46 +- .../web-girder/views/ViewerLoader.vue | 33 +- client/src/components/LayerManager.vue | 4 + client/src/layers/EditAnnotationLayer.ts | 148 +++- client/src/provides.ts | 12 + client/src/recipe.ts | 2 + server/dive_server/views_rpc.py | 157 ++++ 15 files changed, 2924 insertions(+), 9 deletions(-) create mode 100644 client/bundle.css create mode 100644 client/dive-common/recipes/segmentationpointclick.ts create mode 100644 client/platform/desktop/backend/native/segmentation.ts diff --git a/client/bundle.css b/client/bundle.css new file mode 100644 index 000000000..a1cfe4fc8 --- /dev/null +++ b/client/bundle.css @@ -0,0 +1,739 @@ +.controls { + position: absolute; +} + +.video-annotator { + position: relative; + left: 0; + right: 0; + top: 0; + bottom: 0; + z-index: 0; + display: flex; + flex-direction: column; +} +.video-annotator .geojs-map { + margin: 2px; +} +.video-annotator .geojs-map.geojs-map:focus { + outline: none; +} +.video-annotator .playback-container { + flex: 1; +} +.video-annotator .loadingSpinnerContainer { + z-index: 20; + margin: 0; + position: absolute; + top: 50%; + left: 50%; + -ms-transform: translate(-50%, -50%); + transform: translate(-50%, -50%); +} +.video-annotator .geojs-map.annotation-input { + cursor: inherit; +} + +.selected-camera { + box-sizing: content-box; +} +.selected-camera .geojs-map { + outline: 3px cyan dashed; +} +.selected-camera .geojs-map.geojs-map:focus { + outline: 3px cyan dashed; +} + +.imageCursor { + z-index: 10; + position: fixed; + backface-visibility: hidden; + top: 0; + left: 0; + pointer-events: none; +} + +.controls { + bottom: 0; +} + +.controls { + position: absolute; +} + +.video-annotator { + position: relative; + left: 0; + right: 0; + top: 0; + bottom: 0; + z-index: 0; + display: flex; + flex-direction: column; +} +.video-annotator .geojs-map { + margin: 2px; +} +.video-annotator .geojs-map.geojs-map:focus { + outline: none; +} +.video-annotator .playback-container { + flex: 1; +} +.video-annotator .loadingSpinnerContainer { + z-index: 20; + margin: 0; + position: absolute; + top: 50%; + left: 50%; + -ms-transform: translate(-50%, -50%); + transform: translate(-50%, -50%); +} +.video-annotator .geojs-map.annotation-input { + cursor: inherit; +} + +.selected-camera { + box-sizing: content-box; +} +.selected-camera .geojs-map { + outline: 3px cyan dashed; +} +.selected-camera .geojs-map.geojs-map:focus { + outline: 3px cyan dashed; +} + +.imageCursor { + z-index: 10; + position: fixed; + backface-visibility: hidden; + top: 0; + left: 0; + pointer-events: none; +} + +.controls { + bottom: 0; +} + +.controls { + position: absolute; +} + +.video-annotator { + position: relative; + left: 0; + right: 0; + top: 0; + bottom: 0; + z-index: 0; + display: flex; + flex-direction: column; +} +.video-annotator .geojs-map { + margin: 2px; +} +.video-annotator .geojs-map.geojs-map:focus { + outline: none; +} +.video-annotator .playback-container { + flex: 1; +} +.video-annotator .loadingSpinnerContainer { + z-index: 20; + margin: 0; + position: absolute; + top: 50%; + left: 50%; + -ms-transform: translate(-50%, -50%); + transform: translate(-50%, -50%); +} +.video-annotator .geojs-map.annotation-input { + cursor: inherit; +} + +.selected-camera { + box-sizing: content-box; +} +.selected-camera .geojs-map { + outline: 3px cyan dashed; +} +.selected-camera .geojs-map.geojs-map:focus { + outline: 3px cyan dashed; +} + +.imageCursor { + z-index: 10; + position: fixed; + backface-visibility: hidden; + top: 0; + left: 0; + pointer-events: none; +} + +.controls { + bottom: 0; +} + +.border-radius[data-v-77dee125] { + border: 1px solid #888888; + padding: 2px 5px; + border-radius: 5px; +} + +.event-chart { + position: relative; + height: calc(100% - 10px); + margin: 5px 0; + overflow-y: auto; + overflow-x: hidden; +} +.event-chart .tooltip { + position: absolute; + background: black; + border: 1px solid white; + padding: 0px 5px; + font-size: 14px; + z-index: 2; +} + +.input-box { + border: 1px solid rgba(255, 255, 255, 0.15); + border-radius: 4px; + padding: 0 6px; + color: white; +} + +.trackNumber { + font-family: monospace; + max-width: 80px; + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; +} +.trackNumber:hover { + cursor: pointer; + font-weight: bolder; + text-decoration: underline; +} + +.border-highlight[data-v-0d46f934] { + border-bottom: 1px solid gray; +} + +.type-checkbox[data-v-0d46f934] { + max-width: 80%; + overflow-wrap: anywhere; +} + +.hover-show-parent[data-v-0d46f934] .hover-show-child[data-v-0d46f934] { + display: none; +} +.hover-show-parent[data-v-0d46f934][data-v-0d46f934]:hover .hover-show-child[data-v-0d46f934] { + display: inherit; +} + +.outlined[data-v-0d46f934] { + background-color: gray; + color: #222; + font-weight: 600; + border-radius: 6px; + padding: 0 5px; + font-size: 12px; +} + +.groups[data-v-c26ed586] { + overflow-y: auto; + overflow-x: hidden; +} + +.input-box { + border: 1px solid rgba(255, 255, 255, 0.15); + border-radius: 4px; + padding: 0 6px; + color: white; +} + +.trackNumber { + font-family: monospace; + max-width: 80px; + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; +} +.trackNumber:hover { + cursor: pointer; + font-weight: bolder; + text-decoration: underline; +} + +.freeform-input[data-v-d679c59c] { + width: 150px; +} + +.timeline .tick { + shape-rendering: crispEdges; + font-size: 12px; + stroke-opacity: 0.5; + stroke-dasharray: 2, 2; +} + +.timeline[data-v-0d0fe2ba] { + min-height: 175px; + position: relative; + display: flex; + flex-direction: column; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] { + flex: 1; + position: relative; + overflow: hidden; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .hand[data-v-0d0fe2ba] { + position: absolute; + top: 0; + width: 0; + height: 100%; + border-left: 1px solid #299be3; + z-index: 10; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-line[data-v-0d0fe2ba] { + position: absolute; + top: 0; + width: 0; + height: 100%; + z-index: 2; + cursor: col-resize; + pointer-events: auto; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-tooltip[data-v-0d0fe2ba] { + position: absolute; + top: 30px; + transform: translateX(-50%); + background-color: rgba(0, 0, 0, 0.8); + color: white; + padding: 4px 8px; + border-radius: 4px; + font-size: 12px; + white-space: nowrap; + pointer-events: none; + z-index: 20; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-start-line[data-v-0d0fe2ba] { + border-left: 3px solid #4caf50; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-end-line[data-v-0d0fe2ba] { + border-left: 3px solid #f44336; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-dimming[data-v-0d0fe2ba] { + position: absolute; + top: 0; + height: 100%; + background-color: rgba(0, 0, 0, 0.3); + pointer-events: none; + z-index: 1; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .child[data-v-0d0fe2ba] { + position: absolute; + top: 0; + bottom: 17px; + left: 0; + right: 0; + z-index: 0; +} +.timeline[data-v-0d0fe2ba] .minimap[data-v-0d0fe2ba] { + height: 10px; +} +.timeline[data-v-0d0fe2ba] .minimap[data-v-0d0fe2ba] .fill[data-v-0d0fe2ba] { + position: relative; + height: 100%; + background-color: #80c6e8; +} + +.line-chart { + height: 100%; +} +.line-chart .line { + fill: none; + stroke-width: 1.5px; +} +.line-chart .axis-y { + font-size: 12px; +} +.line-chart .axis-y g:first-of-type, +.line-chart .axis-y g:last-of-type { + display: none; +} +.line-chart .tooltip { + position: absolute; + background: black; + border: 1px solid white; + padding: 0px 5px; + font-size: 14px; +} + +.input-box { + border: 1px solid rgba(255, 255, 255, 0.15); + border-radius: 4px; + padding: 0 6px; + color: white; +} + +.trackNumber { + font-family: monospace; + max-width: 80px; + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; +} +.trackNumber:hover { + cursor: pointer; + font-weight: bolder; + text-decoration: underline; +} + +.track-item[data-v-7a688bfe] { + border-radius: inherit; +} +.track-item[data-v-7a688bfe] .item-row[data-v-7a688bfe] { + width: 100%; +} +.track-item[data-v-7a688bfe] .type-color-box[data-v-7a688bfe] { + margin: 7px; + margin-top: 4px; + min-width: 15px; + max-width: 15px; + min-height: 15px; + max-height: 15px; +} + +.track-item-compact[data-v-7a688bfe] { + border-radius: inherit; + height: 50px; + border-bottom: 1px solid #333; + cursor: pointer; +} +.track-item-compact[data-v-7a688bfe][data-v-7a688bfe]:hover { + background-color: #2a2a2a; +} +.track-item-compact[data-v-7a688bfe] .type-color-box-compact[data-v-7a688bfe] { + min-width: 10px; + max-width: 10px; + min-height: 10px; + max-height: 10px; + margin-right: 6px; + border-radius: 2px; +} +.track-item-compact[data-v-7a688bfe] .trackNumber-compact[data-v-7a688bfe] { + font-size: 14px; + font-weight: bold; + margin-right: 8px; + min-width: 30px; +} +.track-item-compact[data-v-7a688bfe] .track-frame-start[data-v-7a688bfe], +.track-item-compact[data-v-7a688bfe] .track-frame-end[data-v-7a688bfe] { + font-size: 14px; + color: #888; + min-width: 45px; + flex-shrink: 0; + text-align: right; +} +.track-item-compact[data-v-7a688bfe] .track-frame-start[data-v-7a688bfe].clickable[data-v-7a688bfe], +.track-item-compact[data-v-7a688bfe] .track-frame-end[data-v-7a688bfe].clickable[data-v-7a688bfe] { + cursor: pointer; +} +.track-item-compact[data-v-7a688bfe] .track-frame-start[data-v-7a688bfe].clickable[data-v-7a688bfe][data-v-7a688bfe]:hover, +.track-item-compact[data-v-7a688bfe] .track-frame-end[data-v-7a688bfe].clickable[data-v-7a688bfe][data-v-7a688bfe]:hover { + color: #80c6e8; + text-decoration: underline; +} +.track-item-compact[data-v-7a688bfe] .track-frame-end[data-v-7a688bfe] { + margin-right: 8px; +} +.track-item-compact[data-v-7a688bfe] .track-notes-wrapper[data-v-7a688bfe] { + position: relative; + display: flex; + align-items: center; + flex-grow: 1; + min-width: 60px; + max-width: 200px; + margin-left: 12px; +} +.track-item-compact[data-v-7a688bfe] .track-notes-edit-zone[data-v-7a688bfe] { + position: absolute; + left: 0; + top: 0; + bottom: 0; + width: 33%; + min-width: 20px; + max-width: 60px; + z-index: 1; +} +.track-item-compact[data-v-7a688bfe] .track-notes-edit-zone[data-v-7a688bfe].editable[data-v-7a688bfe] { + cursor: text; +} +.track-item-compact[data-v-7a688bfe] .track-notes-edit-zone[data-v-7a688bfe].editable[data-v-7a688bfe]:hover ~ .track-notes-compact[data-v-7a688bfe] { + color: #fff; + text-decoration: underline; +} +.track-item-compact[data-v-7a688bfe] .track-notes-compact[data-v-7a688bfe] { + font-size: 14px; + color: #666; + flex-grow: 1; + padding: 1px 4px; + pointer-events: none; +} +.track-item-compact[data-v-7a688bfe] .track-notes-compact[data-v-7a688bfe].has-notes[data-v-7a688bfe] { + color: #aaa; +} +.track-item-compact[data-v-7a688bfe] .compact-notes-input[data-v-7a688bfe] { + font-size: 14px; + flex-grow: 1; + min-width: 60px; + max-width: 200px; + background-color: #333; + border: 1px solid #666; + border-radius: 3px; + color: #fff; + padding: 1px 4px; + margin-left: 12px; + outline: none; +} +.track-item-compact[data-v-7a688bfe] .compact-notes-input[data-v-7a688bfe][data-v-7a688bfe]:focus { + border-color: #888; +} +.track-item-compact[data-v-7a688bfe] .track-type-compact[data-v-7a688bfe] { + font-size: 14px; + color: #aaa; + width: 80px; + min-width: 80px; + flex-shrink: 0; +} +.track-item-compact[data-v-7a688bfe] .track-type-compact[data-v-7a688bfe].editable[data-v-7a688bfe] { + cursor: text; +} +.track-item-compact[data-v-7a688bfe] .track-type-compact[data-v-7a688bfe].editable[data-v-7a688bfe][data-v-7a688bfe]:hover { + color: #fff; + text-decoration: underline; +} +.track-item-compact[data-v-7a688bfe] .track-confidence-compact[data-v-7a688bfe] { + font-size: 14px; + color: #888; + width: 40px; + min-width: 40px; + flex-shrink: 0; + text-align: right; + background-color: #333; + padding: 1px 4px; + border-radius: 3px; + margin-right: 8px; +} +.track-item-compact[data-v-7a688bfe] .track-confidence-compact[data-v-7a688bfe].editable[data-v-7a688bfe] { + cursor: text; +} +.track-item-compact[data-v-7a688bfe] .track-confidence-compact[data-v-7a688bfe].editable[data-v-7a688bfe][data-v-7a688bfe]:hover { + color: #fff; + background-color: #444; +} +.track-item-compact[data-v-7a688bfe] .compact-type-input[data-v-7a688bfe] { + font-size: 14px; + width: 80px; + min-width: 80px; + flex-shrink: 0; + background-color: #333; + border: 1px solid #666; + border-radius: 3px; + color: #fff; + padding: 1px 4px; + outline: none; +} +.track-item-compact[data-v-7a688bfe] .compact-type-input[data-v-7a688bfe][data-v-7a688bfe]:focus { + border-color: #888; +} +.track-item-compact[data-v-7a688bfe] .compact-select-input[data-v-7a688bfe] { + appearance: menulist; + background-color: #333; +} +.track-item-compact[data-v-7a688bfe] .compact-confidence-input[data-v-7a688bfe] { + font-size: 14px; + width: 46px; + min-width: 46px; + flex-shrink: 0; + background-color: #333; + border: 1px solid #666; + border-radius: 3px; + color: #fff; + padding: 1px 4px; + text-align: right; + outline: none; + /* Hide spinner buttons */ + -moz-appearance: textfield; +} +.track-item-compact[data-v-7a688bfe] .compact-confidence-input[data-v-7a688bfe][data-v-7a688bfe]:focus { + border-color: #888; +} +.track-item-compact[data-v-7a688bfe] .compact-confidence-input[data-v-7a688bfe][data-v-7a688bfe]::-webkit-outer-spin-button, .track-item-compact[data-v-7a688bfe] .compact-confidence-input[data-v-7a688bfe][data-v-7a688bfe]::-webkit-inner-spin-button { + -webkit-appearance: none; + margin: 0; +} +.track-item-compact[data-v-7a688bfe] .compact-actions[data-v-7a688bfe] { + flex-shrink: 0; +} + +.nowrap[data-v-a4da19c6] { + white-space: nowrap; + overflow: hidden; + max-width: var(--content-width); + text-overflow: ellipsis; +} + +.hover-show-parent[data-v-a4da19c6] .hover-show-child[data-v-a4da19c6] { + display: none; +} +.hover-show-parent[data-v-a4da19c6][data-v-a4da19c6]:hover .hover-show-child[data-v-a4da19c6] { + display: inherit; +} + +.outlined[data-v-a4da19c6] { + background-color: gray; + color: #222; + font-weight: 600; + border-radius: 6px; + padding: 0 5px; + font-size: 12px; +} + +.strcoller { + height: 100%; +} + +.trackHeader { + height: auto; +} + +.compact-header { + background-color: #262626; + border-bottom: 1px solid #444; + flex-shrink: 0; + min-height: 28px; +} + +.compact-header-text { + font-size: 14px; + font-weight: 600; +} + +.compact-column-headers .col-header { + font-size: 10px; + color: #888; + text-transform: uppercase; + font-weight: 500; +} +.compact-column-headers .col-header.sortable { + cursor: pointer; + user-select: none; +} +.compact-column-headers .col-header.sortable:hover { + color: #fff; +} +.compact-column-headers .col-header.sortable.active { + color: #80c6e8; +} +.compact-column-headers .col-header.sortable .v-icon { + vertical-align: middle; + margin-left: 1px; +} +.compact-column-headers .col-spacer { + /* Matches color box: 10px + 6px margin */ + min-width: 16px; +} +.compact-column-headers .col-id { + /* Matches trackNumber-compact: 30px + 8px margin */ + min-width: 38px; +} +.compact-column-headers .col-type { + /* Matches track-type-compact: 80px */ + min-width: 80px; +} +.compact-column-headers .col-conf { + /* Matches track-confidence-compact: 40px + 8px margin */ + min-width: 48px; + text-align: center; +} +.compact-column-headers .col-start { + /* Matches track-frame-start: 45px */ + min-width: 45px; + text-align: right; +} +.compact-column-headers .col-end { + /* Matches track-frame-end: 45px + 8px margin */ + min-width: 45px; + text-align: right; + margin-right: 8px; +} +.compact-column-headers .col-notes { + flex-grow: 1; + min-width: 60px; + margin-left: 12px; +} +.compact-column-headers .col-actions { + min-width: 100px; + text-align: right; +} + +.tracks { + overflow-y: auto; + overflow-x: hidden; +} +.tracks .v-input--checkbox label { + white-space: pre-wrap; +} + +.tracks-compact { + overflow-y: scroll; + overflow-x: hidden; + /* Always show scrollbar */ +} +.tracks-compact::-webkit-scrollbar { + width: 8px; +} +.tracks-compact::-webkit-scrollbar-track { + background: #1e1e1e; +} +.tracks-compact::-webkit-scrollbar-thumb { + background: #555; + border-radius: 4px; +} +.tracks-compact::-webkit-scrollbar-thumb:hover { + background: #666; +} + +.input-box { + border: 1px solid rgba(255, 255, 255, 0.15); + border-radius: 4px; + padding: 0 6px; + color: white; +} + +.trackNumber { + font-family: monospace; + max-width: 80px; + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; +} +.trackNumber:hover { + cursor: pointer; + font-weight: bolder; + text-decoration: underline; +} + +.freeform-input[data-v-07a75698] { + width: 135px; +} + +.select-input[data-v-07a75698] { + width: 120px; + background-color: #1e1e1e; + appearance: menulist; +} \ No newline at end of file diff --git a/client/dive-common/apispec.ts b/client/dive-common/apispec.ts index 6aa73861d..5ed1b9c2a 100644 --- a/client/dive-common/apispec.ts +++ b/client/dive-common/apispec.ts @@ -207,6 +207,120 @@ function useApi() { return use>(ApiSymbol); } +/** + * Interactive Segmentation Types + */ +export interface SegmentationPredictRequest { + /** Path to the image file */ + imagePath: string; + /** Point coordinates as [x, y] pairs */ + points: [number, number][]; + /** Point labels: 1 for foreground, 0 for background */ + pointLabels: number[]; + /** Optional low-res mask from previous prediction for refinement */ + maskInput?: number[][]; + /** Whether to return multiple mask options */ + multimaskOutput?: boolean; +} + +export interface SegmentationPredictResponse { + /** Whether the prediction succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Polygon coordinates as [x, y] pairs */ + polygon?: [number, number][]; + /** Bounding box [x_min, y_min, x_max, y_max] */ + bounds?: [number, number, number, number]; + /** Quality score from segmentation model */ + score?: number; + /** Low-res mask for subsequent refinement */ + lowResMask?: number[][]; + /** Mask dimensions [height, width] */ + maskShape?: [number, number]; +} + +export interface SegmentationStatusResponse { + /** Whether segmentation is available */ + available: boolean; + /** Whether the model is currently loaded */ + loaded?: boolean; + /** Whether the service is ready for predictions */ + ready?: boolean; +} + +/** + * Text Query Types for SAM3 open-vocabulary detection/segmentation + */ + +/** A single detection returned from a text query */ +export interface TextQueryDetection { + /** Bounding box [x1, y1, x2, y2] */ + box: [number, number, number, number]; + /** Polygon coordinates as [x, y] pairs */ + polygon?: [number, number][]; + /** Confidence score */ + score: number; + /** Label/class name (often the query text) */ + label: string; + /** Low-res mask for refinement (optional) */ + lowResMask?: number[][]; +} + +export interface TextQueryRequest { + /** Path to the image file */ + imagePath: string; + /** Text query describing what to find (e.g., "fish", "person swimming") */ + text: string; + /** Confidence threshold for detections (default: 0.3) */ + boxThreshold?: number; + /** Maximum number of detections to return (default: 10) */ + maxDetections?: number; + /** Optional boxes to refine [x1, y1, x2, y2][] */ + boxes?: [number, number, number, number][]; + /** Optional keypoints for refinement [x, y][] */ + points?: [number, number][]; + /** Labels for points: 1 for foreground, 0 for background */ + pointLabels?: number[]; + /** Optional masks to refine */ + masks?: number[][][]; +} + +export interface TextQueryResponse { + /** Whether the query succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** List of detections found */ + detections?: TextQueryDetection[]; + /** The original query text */ + query?: string; + /** Whether fallback method was used (no native text support) */ + fallback?: boolean; +} + +export interface RefineDetectionsRequest { + /** Path to the image file */ + imagePath: string; + /** Detections to refine */ + detections: TextQueryDetection[]; + /** Optional additional keypoints for refinement [x, y][] */ + points?: [number, number][]; + /** Labels for additional points: 1 for foreground, 0 for background */ + pointLabels?: number[]; + /** Whether to include refined masks in response */ + refineMasks?: boolean; +} + +export interface RefineDetectionsResponse { + /** Whether the refinement succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Refined detections */ + detections?: TextQueryDetection[]; +} + export { provideApi, useApi, diff --git a/client/dive-common/recipes/segmentationpointclick.ts b/client/dive-common/recipes/segmentationpointclick.ts new file mode 100644 index 000000000..d20350861 --- /dev/null +++ b/client/dive-common/recipes/segmentationpointclick.ts @@ -0,0 +1,690 @@ +/** + * Segmentation Point-Click Recipe + * + * Allows users to click on objects to automatically generate segmentation + * masks using point-based segmentation models. + * + * Usage: + * - Activate with 's' hotkey or Segment button + * - Left-click: Add foreground point (include in segmentation) + * - Shift+click or Middle-click: Add background point (exclude from segmentation) + * - Right-click: Confirm and lock the annotation + * - Enter: Confirm and commit the segmentation + * - Escape: Cancel and clear points + * + * Multi-frame support: + * - Points are tracked per-frame + * - When switching frames, previous frame's points are saved + * - Visual dots only show for the current frame + * - Confirming commits all frames with valid polygons + * + * Error handling: + * - If first point fails, segmentation is deactivated + * - If subsequent points fail, the point is rejected with message + * "Latest point rejected by segmentation method" + */ + +import Vue, { ref, Ref } from 'vue'; + +import Track from 'vue-media-annotator/track'; +import Recipe, { UpdateResponse } from 'vue-media-annotator/recipe'; +import { EditAnnotationTypes } from 'vue-media-annotator/layers'; +import { Mousetrap } from 'vue-media-annotator/types'; +import { SegmentationPredictRequest, SegmentationPredictResponse } from 'dive-common/apispec'; + +export const SegmentationPolygonKey = 'SegmentationPolygon'; + +const EmptyResponse: UpdateResponse = { + data: {}, + union: [], + unionWithoutBounds: [], +}; + +export interface SegmentationRecipeOptions { + /** + * Function to call segmentation predict API (platform-specific) + * @param request - The prediction request with points and labels + * @param frameNum - The current frame number (useful for web platform) + */ + predictFn: (request: SegmentationPredictRequest, frameNum: number) => Promise; + /** Function to get image path for current frame (used by desktop platform) */ + getImagePath: (frameNum: number) => string; + /** + * Optional function to initialize the segmentation service. + * Called when the recipe is activated (user clicks Segment button). + * Should throw an error if initialization fails. + */ + initializeServiceFn?: () => Promise; +} + +/** Callback data when prediction completes */ +export interface SegmentationPredictionResult { + polygon: [number, number][]; + bounds: [number, number, number, number] | null; + frameNum: number; +} + +/** Data stored per frame for multi-frame segmentation */ +interface FrameSegmentationData { + points: [number, number][]; + labels: number[]; + polygon: [number, number][] | null; + bounds: [number, number, number, number] | null; + lowResMask: number[][] | null; +} + +/** Result containing all frames for multi-frame confirmation */ +export interface MultiFrameSegmentationResult { + /** Map of frame number to segmentation result */ + frames: Map; +} + +/** + * Segmentation Point-Click Recipe + * + * This recipe captures point clicks and uses segmentation models to generate polygons. + */ +export default class SegmentationPointClick implements Recipe { + active: Ref; + + name: string; + + bus: Vue; + + toggleable: Ref; + + icon: Ref; + + /** Platform-specific segmentation predict function */ + private predictFn: ((request: SegmentationPredictRequest, frameNum: number) => Promise) | null = null; + + /** Function to get image path for current frame */ + private getImagePath: ((frameNum: number) => string) | null = null; + + /** Function to initialize the segmentation service (called on activation) */ + private initializeServiceFn: (() => Promise) | null = null; + + /** Whether the service has been successfully initialized */ + private serviceInitialized: boolean = false; + + /** Accumulated points for current frame's segmentation */ + private points: [number, number][] = []; + + /** Labels for accumulated points (1=foreground, 0=background) */ + private pointLabels: number[] = []; + + /** Low-res mask from last prediction (for refinement) */ + private lastLowResMask: number[][] | null = null; + + /** Pending polygon from async prediction */ + private pendingPolygon: [number, number][] | null = null; + + /** Pending bounds from async prediction */ + private pendingBounds: [number, number, number, number] | null = null; + + /** Whether a prediction is currently in progress */ + private isPredicting: boolean = false; + + /** Current frame number */ + private currentFrame: number = 0; + + /** Per-frame segmentation data for multi-frame support */ + private frameData: Map = new Map(); + + constructor() { + this.bus = new Vue(); + this.active = ref(false); + this.name = 'Segment'; + this.toggleable = ref(true); + this.icon = ref('mdi-auto-fix'); + } + + /** + * Initialize the recipe with platform-specific options. + * Must be called before using the recipe. + */ + initialize(options: SegmentationRecipeOptions): void { + this.predictFn = options.predictFn; + this.getImagePath = options.getImagePath; + this.initializeServiceFn = options.initializeServiceFn || null; + // Reset service initialization state when re-initializing + this.serviceInitialized = false; + } + + /** + * Reset the recipe state (clear accumulated points for all frames) + */ + private reset(): void { + this.points = []; + this.pointLabels = []; + this.lastLowResMask = null; + this.pendingPolygon = null; + this.pendingBounds = null; + this.isPredicting = false; + this.frameData.clear(); + // Clear visual feedback for points + this.bus.$emit('points-updated', { points: [], labels: [], frameNum: this.currentFrame }); + } + + /** + * Reset only the current frame's points (used when clearing current frame) + */ + private resetCurrentFrame(): void { + this.points = []; + this.pointLabels = []; + this.lastLowResMask = null; + this.pendingPolygon = null; + this.pendingBounds = null; + this.frameData.delete(this.currentFrame); + // Clear visual feedback for points + this.bus.$emit('points-updated', { points: [], labels: [], frameNum: this.currentFrame }); + } + + /** + * Save current frame's data to frameData map + */ + private saveCurrentFrameData(): void { + if (this.points.length > 0 || this.pendingPolygon) { + this.frameData.set(this.currentFrame, { + points: [...this.points], + labels: [...this.pointLabels], + polygon: this.pendingPolygon ? [...this.pendingPolygon] : null, + bounds: this.pendingBounds ? [...this.pendingBounds] as [number, number, number, number] : null, + lowResMask: this.lastLowResMask, + }); + } + } + + /** + * Load frame data from frameData map into current state + */ + private loadFrameData(frameNum: number): void { + const data = this.frameData.get(frameNum); + if (data) { + this.points = [...data.points]; + this.pointLabels = [...data.labels]; + this.pendingPolygon = data.polygon ? [...data.polygon] : null; + this.pendingBounds = data.bounds ? [...data.bounds] as [number, number, number, number] : null; + this.lastLowResMask = data.lowResMask; + } else { + this.points = []; + this.pointLabels = []; + this.pendingPolygon = null; + this.pendingBounds = null; + this.lastLowResMask = null; + } + } + + /** + * Handle frame change - save current frame's data and load new frame's data + * Emits event to clear visual dots when moving to a different frame + */ + handleFrameChange(newFrame: number): void { + if (!this.active.value) return; + if (newFrame === this.currentFrame) return; + + // Save current frame's data + this.saveCurrentFrameData(); + + // Update current frame + this.currentFrame = newFrame; + + // Load new frame's data (if any) + this.loadFrameData(newFrame); + + // Update visual feedback for the new frame + // If new frame has no points, this clears the display + this.bus.$emit('points-updated', { + points: [...this.points], + labels: [...this.pointLabels], + frameNum: newFrame, + }); + + // If new frame has a pending polygon, emit it + if (this.pendingPolygon) { + this.bus.$emit('prediction-ready', { + polygon: this.pendingPolygon, + bounds: this.pendingBounds, + frameNum: newFrame, + } as SegmentationPredictionResult); + } + } + + /** + * Make segmentation prediction with current points + * @param frameNum - The frame number to predict on + * @param isFirstPoint - Whether this is the first point (affects error handling) + */ + private async makePrediction(frameNum: number, isFirstPoint: boolean = false): Promise { + if (!this.predictFn || !this.getImagePath) { + return; + } + + if (this.points.length === 0) { + return; + } + + this.isPredicting = true; + + try { + const imagePath = this.getImagePath(frameNum); + const request: SegmentationPredictRequest = { + imagePath, + points: this.points, + pointLabels: this.pointLabels, + maskInput: this.lastLowResMask ?? undefined, + multimaskOutput: this.points.length === 1, // Use multimask for single point + }; + + const response = await this.predictFn(request, frameNum); + + if (response.success && response.polygon && response.polygon.length > 0) { + this.pendingPolygon = response.polygon; + this.pendingBounds = response.bounds ?? null; + this.lastLowResMask = response.lowResMask ?? null; + + // Emit event to notify that prediction is ready + // Include frameNum so listeners can update the correct frame + this.bus.$emit('prediction-ready', { + polygon: response.polygon, + bounds: response.bounds, + score: response.score, + frameNum, + } as SegmentationPredictionResult & { score?: number }); + } else { + // Prediction returned an error - handle point rejection + this.handlePredictionError(response.error || 'Prediction failed', isFirstPoint, frameNum); + } + } catch (error) { + // Exception during prediction - handle point rejection + const errorMessage = error instanceof Error ? error.message : 'Prediction failed'; + this.handlePredictionError(errorMessage, isFirstPoint, frameNum); + } finally { + this.isPredicting = false; + } + } + + /** + * Handle prediction errors - remove rejected point and stay in edit mode. + * The user can manually reset or cancel if they want to start over. + */ + private handlePredictionError(originalError: string, isFirstPoint: boolean, frameNum: number): void { + // Remove the rejected point regardless of whether it was the first point + this.points.pop(); + this.pointLabels.pop(); + + // Update icon to reflect new point count + this.icon.value = this.points.length > 0 + ? `mdi-numeric-${Math.min(this.points.length, 9)}-circle` + : 'mdi-auto-fix'; + + // Emit updated points to remove the rejected point from visual display + this.bus.$emit('points-updated', { + points: [...this.points], + labels: [...this.pointLabels], + frameNum, + }); + + // Show error message - different message for first point vs subsequent + if (isFirstPoint) { + this.bus.$emit('prediction-error', originalError); + } else { + this.bus.$emit('prediction-error', 'Latest point rejected by segmentation method'); + } + // Stay in edit mode - let the user decide to reset or try again + } + + /** + * Recipe update handler - called when user draws/clicks + */ + update( + mode: 'in-progress' | 'editing', + frameNum: number, + // eslint-disable-next-line @typescript-eslint/no-unused-vars + track: Track, + data: GeoJSON.Feature[], + // eslint-disable-next-line @typescript-eslint/no-unused-vars + key?: string, + ): Readonly { + // Only process if this recipe is active + if (!this.active.value) { + return EmptyResponse; + } + + this.currentFrame = frameNum; + + // Look for point features in the data + const pointFeatures = data.filter( + (d) => d.geometry.type === 'Point', + ) as GeoJSON.Feature[]; + + // Handle Point clicks - Point mode emits 'editing' directly (not 'in-progress') + // because points complete immediately. We need to handle both modes. + if (pointFeatures.length > 0) { + const point = pointFeatures[0]; + const coords = point.geometry.coordinates as [number, number]; + + // Check if this is the same point we already processed (avoid duplicates) + const lastPoint = this.points[this.points.length - 1]; + const isDuplicate = lastPoint + && lastPoint[0] === coords[0] + && lastPoint[1] === coords[1]; + + if (!isDuplicate) { + // Track if this is the first point (for error handling) + const isFirstPoint = this.points.length === 0; + + // Determine if this is a foreground or background point + // Check for shift key or middle-click via properties (if available) + const isBackground = point.properties?.background === true; + const label = isBackground ? 0 : 1; + + // Add point to accumulator + this.points.push(coords); + this.pointLabels.push(label); + + // Update icon to show point count + this.icon.value = this.points.length > 1 + ? `mdi-numeric-${Math.min(this.points.length, 9)}-circle` + : 'mdi-auto-fix'; + + // Emit point update for visual feedback (green=foreground, red=background) + this.bus.$emit('points-updated', { + points: [...this.points], + labels: [...this.pointLabels], + frameNum, + }); + + // Trigger segmentation prediction asynchronously + // The prediction result will be handled by the event listener in Viewer + // Pass isFirstPoint so error handling knows whether to reject the point + this.makePrediction(frameNum, isFirstPoint); + } + + // For Point mode, we DON'T return polygon data here. + // The polygon will be set directly on the track when prediction completes + // via the 'prediction-ready' event handler in Viewer.vue. + // Return done: false to keep the track in edit mode. + return { + data: {}, + union: [], + unionWithoutBounds: [], + done: false, + }; + } + + // If we're in editing mode with non-point data and have a pending polygon, commit it + if (mode === 'editing' && this.pendingPolygon && this.pendingPolygon.length > 2) { + const polygon: GeoJSON.Feature = { + type: 'Feature', + geometry: { + type: 'Polygon', + coordinates: [this.pendingPolygon], + }, + properties: {}, + }; + + const unionPolygon = this.pendingBounds + ? SegmentationPointClick.boundsToPolygon(this.pendingBounds) + : null; + + // Clear state after committing + this.reset(); + this.deactivate(); + + return { + data: { + [SegmentationPolygonKey]: [polygon], + }, + union: unionPolygon ? [unionPolygon] : [], + unionWithoutBounds: [], + newSelectedKey: SegmentationPolygonKey, + done: true, + }; + } + + return EmptyResponse; + } + + /** + * Convert bounds to a GeoJSON Polygon for union calculation + */ + private static boundsToPolygon(bounds: [number, number, number, number]): GeoJSON.Polygon { + const [minX, minY, maxX, maxY] = bounds; + return { + type: 'Polygon', + coordinates: [[ + [minX, minY], + [maxX, minY], + [maxX, maxY], + [minX, maxY], + [minX, minY], + ]], + }; + } + + /** + * Handle deletion of segmentation geometry + */ + delete(frame: number, track: Track, key: string, type: EditAnnotationTypes): void { + if (key === SegmentationPolygonKey && type === 'Polygon') { + track.removeFeatureGeometry(frame, { type: 'Polygon', key: SegmentationPolygonKey }); + this.reset(); + } + } + + /** + * Handle point deletion (not applicable for segmentation, but required by interface) + */ + deletePoint( + frame: number, + track: Track, + idx: number, + key: string, + type: EditAnnotationTypes, + ): void { + // Segmentation doesn't support individual point deletion within a polygon + // If needed, delete the whole polygon + if (key === SegmentationPolygonKey && type === 'Polygon') { + this.delete(frame, track, key, type); + } + } + + /** + * Activate the segmentation recipe. + * If an initializeServiceFn was provided, it will be called first to ensure + * the segmentation service is ready. If initialization fails, the recipe will not + * activate and an error event will be emitted. + */ + activate(): void { + // If we have an initialization function and haven't initialized yet, do it now + if (this.initializeServiceFn && !this.serviceInitialized) { + // Show loading state + this.icon.value = 'mdi-loading'; + + this.initializeServiceFn() + .then(() => { + this.serviceInitialized = true; + this.completeActivation(); + }) + .catch((error) => { + const errorMessage = error instanceof Error ? error.message : 'Unable to load segmentation module'; + this.bus.$emit('prediction-error', errorMessage); + this.icon.value = 'mdi-auto-fix'; + // Don't activate - stay in previous mode + }); + } else { + // No initialization function or already initialized - activate immediately + this.completeActivation(); + } + } + + /** + * Complete the activation after service is ready + */ + private completeActivation(): void { + this.active.value = true; + this.reset(); + this.icon.value = 'mdi-auto-fix'; + + // Emit activation event to trigger Point editing mode + this.bus.$emit('activate', { + editing: 'Point' as EditAnnotationTypes, + key: SegmentationPolygonKey, + recipeName: this.name, + }); + } + + /** + * Deactivate the segmentation recipe + */ + deactivate(): void { + this.active.value = false; + this.reset(); + this.icon.value = 'mdi-auto-fix'; + + // Emit empty points to clear the visual points layer + this.bus.$emit('points-updated', { + points: [], + labels: [], + frameNum: this.currentFrame, + }); + } + + /** + * Check if there's a pending prediction that can be confirmed (current frame or any saved frame) + */ + hasPendingPrediction(): boolean { + // Check current frame + if (this.pendingPolygon !== null && this.pendingPolygon.length > 2) { + return true; + } + // Check saved frames + return Array.from(this.frameData.values()).some( + (data) => data.polygon && data.polygon.length > 2, + ); + } + + /** + * Check if there are any points accumulated (current frame or any saved frame) + */ + hasPoints(): boolean { + // Check current frame + if (this.points.length > 0) { + return true; + } + // Check saved frames + return Array.from(this.frameData.values()).some( + (data) => data.points.length > 0, + ); + } + + /** + * Get the number of frames with pending predictions + */ + getFrameCount(): number { + // Save current frame data first + this.saveCurrentFrameData(); + + return Array.from(this.frameData.values()).filter( + (data) => data.polygon && data.polygon.length > 2, + ).length; + } + + /** + * Public method to reset (clear) all accumulated points and pending prediction. + * Called from UI Reset button. Clears all frames. + */ + resetPoints(): void { + // Emit reset event for all frames with data + const framesToReset = [this.currentFrame, ...this.frameData.keys()]; + framesToReset.forEach((frameNum) => { + this.bus.$emit('prediction-reset', { frameNum }); + }); + this.reset(); + this.icon.value = 'mdi-auto-fix'; + } + + /** + * Public method to confirm the current prediction and emit it for track update. + * Called from UI Confirm button. Confirms all frames with valid polygons. + */ + confirmPrediction(): void { + // Save current frame data to frameData map + this.saveCurrentFrameData(); + + // Collect all frames with valid polygons + const confirmedFrames: Map = new Map(); + + Array.from(this.frameData.entries()).forEach(([frameNum, data]) => { + if (data.polygon && data.polygon.length > 2) { + confirmedFrames.set(frameNum, { + polygon: data.polygon, + bounds: data.bounds, + frameNum, + }); + } + }); + + if (confirmedFrames.size === 0) { + return; + } + + // Emit multi-frame confirmed event + this.bus.$emit('prediction-confirmed-multi', { + frames: confirmedFrames, + } as MultiFrameSegmentationResult); + + // Also emit single-frame events for backward compatibility + // (in case only single-frame handler is registered) + Array.from(confirmedFrames.values()).forEach((result) => { + this.bus.$emit('prediction-confirmed', result); + }); + + // Reset state and deactivate + this.reset(); + this.deactivate(); + } + + /** + * Implements the Recipe interface's confirm method. + * Called when right-click is used to lock the annotation. + */ + confirm(): void { + if (this.active.value && this.hasPendingPrediction()) { + this.confirmPrediction(); + } + } + + /** + * Keyboard shortcuts for segmentation recipe + */ + mousetrap(): Mousetrap[] { + return [ + { + bind: 's', + handler: () => { + if (!this.active.value) { + this.activate(); + } + }, + }, + { + bind: 'escape', + handler: () => { + if (this.active.value) { + this.resetPoints(); + } + }, + }, + { + bind: 'enter', + handler: () => { + if (this.active.value && this.hasPendingPrediction()) { + this.confirmPrediction(); + } + }, + }, + ]; + } +} diff --git a/client/dive-common/use/useModeManager.ts b/client/dive-common/use/useModeManager.ts index aaeb0db6a..c42300ae2 100644 --- a/client/dive-common/use/useModeManager.ts +++ b/client/dive-common/use/useModeManager.ts @@ -2,7 +2,7 @@ import { computed, Ref, reactive, ref, onBeforeUnmount, toRef, } from 'vue'; import { uniq, flatMapDeep, flattenDeep } from 'lodash'; -import Track, { TrackId } from 'vue-media-annotator/track'; +import Track, { TrackId, TrackSupportedFeature } from 'vue-media-annotator/track'; import { RectBounds, updateBounds } from 'vue-media-annotator/utils'; import { EditAnnotationTypes, VisibleAnnotationTypes } from 'vue-media-annotator/layers'; import { AggregateMediaController } from 'vue-media-annotator/components/annotators/mediaControllerType'; @@ -414,6 +414,37 @@ export default function useModeManager({ } } + /** + * Set a feature on a track with proper interpolation handling. + * This is used by segmentation and other modes that need to set features + * while respecting track settings and interpolation logic. + */ + function handleSetTrackFeature( + frameNum: number, + bounds: RectBounds, + geometry: GeoJSON.Feature[], + runAfterLogic: boolean = true, + ) { + if (selectedTrackId.value !== null) { + const track = cameraStore.getPossibleTrack(selectedTrackId.value, selectedCamera.value); + if (track) { + const { interpolate } = track.canInterpolate(frameNum); + + track.setFeature({ + frame: frameNum, + flick: 0, + bounds, + keyframe: true, + interpolate: _shouldInterpolate(interpolate), + }, geometry); + + if (runAfterLogic) { + newTrackSettingsAfterLogic(track); + } + } + } + } + function handleUpdateGeoJSON( eventType: 'in-progress' | 'editing', frameNum: number, @@ -703,6 +734,18 @@ export default function useModeManager({ } } + /** + * Confirm the current annotation for any active recipe that supports it. + * Called when right-click is used in Point mode to lock the annotation. + */ + function handleConfirmRecipe() { + recipes.forEach((r) => { + if (r.active.value && r.confirm) { + r.confirm(); + } + }); + } + /** * Merge: Enabled whenever there are candidates in the merge list */ @@ -823,6 +866,7 @@ export default function useModeManager({ selectNextTrack, handler: { commitMerge: handleCommitMerge, + confirmRecipe: handleConfirmRecipe, groupAdd: handleAddGroup, deleteSelectedTracks: handleDeleteSelectedTracks, groupEdit: handleGroupEdit, @@ -833,6 +877,7 @@ export default function useModeManager({ trackSeek: handleTrackClick, trackSelect: handleSelectTrack, trackSelectNext: handleSelectNext, + setTrackFeature: handleSetTrackFeature, updateRectBounds: handleUpdateRectBounds, updateGeoJSON: handleUpdateGeoJSON, removeTrack: handleRemoveTrack, diff --git a/client/platform/desktop/backend/ipcService.ts b/client/platform/desktop/backend/ipcService.ts index 3f11cf3b7..f8b91f2d0 100644 --- a/client/platform/desktop/backend/ipcService.ts +++ b/client/platform/desktop/backend/ipcService.ts @@ -20,6 +20,11 @@ import * as common from './native/common'; import beginMultiCamImport from './native/multiCamImport'; import settings from './state/settings'; import { listen } from './server'; +import { + getSegmentationServiceManager, + shutdownSegmentationService, + SegmentationPredictRequest, +} from './native/segmentation'; // defaults to linux if win32 doesn't exist const currentPlatform = OS.platform() === 'win32' ? win32 : linux; @@ -171,4 +176,99 @@ export default function register() { }; return currentPlatform.train(settings.get(), args, updater); }); + + /** + * Interactive Segmentation Service + */ + + ipcMain.handle('segmentation-initialize', async () => { + const segService = getSegmentationServiceManager(); + await segService.initialize(settings.get()); + return { success: true }; + }); + + ipcMain.handle('segmentation-predict', async (_, args: SegmentationPredictRequest) => { + const segService = getSegmentationServiceManager(); + + // Auto-initialize if not ready + if (!segService.isReady()) { + await segService.initialize(settings.get()); + } + + const response = await segService.predict(args); + return response; + }); + + ipcMain.handle('segmentation-set-image', async (_, imagePath: string) => { + const segService = getSegmentationServiceManager(); + + if (!segService.isReady()) { + await segService.initialize(settings.get()); + } + + await segService.setImage(imagePath); + return { success: true }; + }); + + ipcMain.handle('segmentation-clear-image', async () => { + const segService = getSegmentationServiceManager(); + + if (segService.isReady()) { + await segService.clearImage(); + } + return { success: true }; + }); + + ipcMain.handle('segmentation-shutdown', async () => { + await shutdownSegmentationService(); + return { success: true }; + }); + + ipcMain.handle('segmentation-is-ready', () => { + const segService = getSegmentationServiceManager(); + return { ready: segService.isReady() }; + }); + + ipcMain.handle('segmentation-text-query', async (_, args: { + imagePath: string; + text: string; + boxThreshold?: number; + maxDetections?: number; + boxes?: [number, number, number, number][]; + points?: [number, number][]; + pointLabels?: number[]; + }) => { + const segService = getSegmentationServiceManager(); + + // Auto-initialize if not ready + if (!segService.isReady()) { + await segService.initialize(settings.get()); + } + + const response = await segService.textQuery(args); + return response; + }); + + ipcMain.handle('segmentation-refine', async (_, args: { + imagePath: string; + detections: { + box: [number, number, number, number]; + polygon?: [number, number][]; + score: number; + label: string; + }[]; + points?: [number, number][]; + pointLabels?: number[]; + refineMasks?: boolean; + }) => { + const segService = getSegmentationServiceManager(); + + // Auto-initialize if not ready + if (!segService.isReady()) { + await segService.initialize(settings.get()); + } + + const response = await segService.refineDetections(args); + return response; + }); } diff --git a/client/platform/desktop/backend/native/segmentation.ts b/client/platform/desktop/backend/native/segmentation.ts new file mode 100644 index 000000000..cda5e58b0 --- /dev/null +++ b/client/platform/desktop/backend/native/segmentation.ts @@ -0,0 +1,529 @@ +/** + * Interactive Segmentation Service Manager for Desktop + * + * Manages a persistent Python subprocess that keeps the segmentation model loaded in memory + * for fast interactive segmentation from point clicks. + */ + +import { spawn, ChildProcess } from 'child_process'; +import npath from 'path'; +import readline from 'readline'; +import { EventEmitter } from 'events'; +import { Settings } from 'platform/desktop/constants'; +import { observeChild } from './processManager'; + +/** Error message shown to users when segmentation fails to load */ +export const SEGMENTATION_LOAD_ERROR_MESSAGE = 'Unable to load segmentation module'; + +/** Request to the segmentation service */ +export interface SegmentationInternalPredictRequest { + /** Unique request ID for correlation */ + id: string; + /** Path to the image file */ + imagePath: string; + /** Point coordinates as [x, y] pairs */ + points: [number, number][]; + /** Point labels: 1 for foreground, 0 for background */ + pointLabels: number[]; + /** Optional low-res mask from previous prediction for refinement */ + maskInput?: number[][]; + /** Whether to return multiple mask options */ + multimaskOutput?: boolean; +} + +/** Response from the segmentation service */ +export interface SegmentationInternalPredictResponse { + /** Request ID for correlation */ + id: string; + /** Whether the prediction succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Polygon coordinates as [x, y] pairs */ + polygon?: [number, number][]; + /** Bounding box [x_min, y_min, x_max, y_max] */ + bounds?: [number, number, number, number]; + /** Quality score from segmentation model */ + score?: number; + /** Low-res mask for subsequent refinement */ + lowResMask?: number[][]; + /** Mask dimensions [height, width] */ + maskShape?: [number, number]; +} + +interface PendingRequest { + resolve: (response: SegmentationInternalPredictResponse) => void; + reject: (error: Error) => void; + timeout: NodeJS.Timeout; +} + +/** + * Segmentation Service Manager + * + * Manages a persistent Python subprocess for interactive segmentation inference. + * The service is started on-demand and kept alive for the session. + */ +export class SegmentationServiceManager extends EventEmitter { + private process: ChildProcess | null = null; + + private readline: readline.Interface | null = null; + + private pendingRequests: Map = new Map(); + + private isInitializing = false; + + private initPromise: Promise | null = null; + + private settings: Settings | null = null; + + private requestCounter = 0; + + private readonly requestTimeoutMs = 30000; // 30 second timeout + + /** + * Initialize the segmentation service with the given settings. + * This spawns the Python process and loads the segmentation model. + * The model stays loaded for the entire session to avoid reload delays. + */ + async initialize(settings: Settings): Promise { + // If already initialized and running, return immediately + // This keeps the model loaded between activations + if (this.isReady()) { + console.log('[Segmentation] Service already running, skipping initialization'); + return undefined; + } + + // If currently initializing, wait for it + if (this.isInitializing && this.initPromise) { + await this.initPromise; + return undefined; + } + + this.isInitializing = true; + this.settings = settings; + + this.initPromise = this._doInitialize(settings); + try { + await this.initPromise; + } finally { + this.isInitializing = false; + } + return undefined; + } + + private async _doInitialize(settings: Settings): Promise { + // Clean up any existing process + await this.shutdown(); + + return new Promise((resolve, reject) => { + const viameSetup = npath.join(settings.viamePath, 'setup_viame.sh'); + + // Build the command to run the segmentation service via Python module import + // This avoids absolute path issues and uses the installed viame.pytorch module + const command = [ + `. "${viameSetup}"`, + '&&', + 'python -m viame.pytorch.sam2_interactive', + `--viame-path "${settings.viamePath}"`, + '--device cuda', + ].join(' '); + + console.log('[Segmentation] Starting interactive segmentation service...'); + console.log(`[Segmentation] Command: ${command}`); + + this.process = observeChild(spawn(command, { + shell: '/bin/bash', + stdio: ['pipe', 'pipe', 'pipe'], + })); + + // Set up readline for stdout (JSON responses) + if (this.process.stdout) { + this.readline = readline.createInterface({ + input: this.process.stdout, + crlfDelay: Infinity, + }); + + this.readline.on('line', (line) => { + this.handleResponse(line); + }); + } + + // Log stderr (diagnostic messages) + if (this.process.stderr) { + this.process.stderr.on('data', (data: Buffer) => { + const message = data.toString().trim(); + if (message) { + console.log(`[Segmentation] ${message}`); + // Detect successful initialization + if (message.includes('SAM2 model initialized successfully')) { + resolve(); + } + } + }); + } + + // Handle process exit + this.process.on('exit', (code, signal) => { + console.log(`[Segmentation] Process exited with code ${code}, signal ${signal}`); + this.cleanup(); + if (this.isInitializing) { + reject(new Error(SEGMENTATION_LOAD_ERROR_MESSAGE)); + } + }); + + this.process.on('error', (err) => { + console.error('[Segmentation] Process error:', err); + this.cleanup(); + if (this.isInitializing) { + reject(new Error(SEGMENTATION_LOAD_ERROR_MESSAGE)); + } + }); + + // Timeout for initialization (60 seconds for model loading) + setTimeout(() => { + if (this.isInitializing) { + reject(new Error(SEGMENTATION_LOAD_ERROR_MESSAGE)); + } + }, 60000); + }); + } + + /** + * Check if the service is ready for requests + */ + isReady(): boolean { + return this.process !== null && this.process.exitCode === null; + } + + /** + * Generate a unique request ID + */ + private generateRequestId(): string { + this.requestCounter += 1; + return `req_${Date.now()}_${this.requestCounter}`; + } + + /** + * Handle a response line from the segmentation service + */ + private handleResponse(line: string): void { + try { + const response = JSON.parse(line) as SegmentationInternalPredictResponse; + const pending = this.pendingRequests.get(response.id); + + if (pending) { + clearTimeout(pending.timeout); + this.pendingRequests.delete(response.id); + pending.resolve(response); + } else { + console.warn(`[Segmentation] Received response for unknown request: ${response.id}`); + } + } catch (err) { + console.error('[Segmentation] Failed to parse response:', line, err); + } + } + + /** + * Send a predict request to the segmentation service + */ + async predict(request: Omit): Promise { + if (!this.isReady()) { + throw new Error('Segmentation service is not ready. Call initialize() first.'); + } + + if (!this.process?.stdin) { + throw new Error('Segmentation service stdin is not available'); + } + + const id = this.generateRequestId(); + const fullRequest = { + id, + command: 'predict', + image_path: request.imagePath, + points: request.points, + point_labels: request.pointLabels, + mask_input: request.maskInput, + multimask_output: request.multimaskOutput ?? false, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error(`Segmentation predict request timed out after ${this.requestTimeoutMs}ms`)); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { resolve, reject, timeout }); + + // Send the request as JSON line + const requestLine = `${JSON.stringify(fullRequest)}\n`; + this.process!.stdin!.write(requestLine, (err) => { + if (err) { + clearTimeout(timeout); + this.pendingRequests.delete(id); + reject(err); + } + }); + }); + } + + /** + * Pre-load an image for multiple predictions (optional optimization) + */ + async setImage(imagePath: string): Promise { + if (!this.isReady()) { + throw new Error('Segmentation service is not ready'); + } + + const id = this.generateRequestId(); + const request = { + id, + command: 'set_image', + image_path: imagePath, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('set_image request timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: () => resolve(), + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(request)}\n`; + this.process!.stdin!.write(requestLine); + }); + } + + /** + * Clear the cached image + */ + async clearImage(): Promise { + if (!this.isReady()) { + return undefined; // Nothing to clear + } + + const id = this.generateRequestId(); + const request = { + id, + command: 'clear_image', + }; + + await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('clear_image request timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: () => resolve(), + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(request)}\n`; + this.process!.stdin!.write(requestLine); + }); + return undefined; + } + + /** + * Send a text query request for open-vocabulary detection/segmentation + */ + async textQuery(request: { + imagePath: string; + text: string; + boxThreshold?: number; + maxDetections?: number; + boxes?: [number, number, number, number][]; + points?: [number, number][]; + pointLabels?: number[]; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + }): Promise { + if (!this.isReady()) { + throw new Error('Segmentation service is not ready. Call initialize() first.'); + } + + if (!this.process?.stdin) { + throw new Error('Segmentation service stdin is not available'); + } + + const id = this.generateRequestId(); + const fullRequest = { + id, + command: 'text_query', + image_path: request.imagePath, + text: request.text, + box_threshold: request.boxThreshold ?? 0.3, + max_detections: request.maxDetections ?? 10, + boxes: request.boxes, + points: request.points, + point_labels: request.pointLabels, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error(`Text query request timed out after ${this.requestTimeoutMs}ms`)); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { resolve, reject, timeout }); + + const requestLine = `${JSON.stringify(fullRequest)}\n`; + this.process!.stdin!.write(requestLine, (err) => { + if (err) { + clearTimeout(timeout); + this.pendingRequests.delete(id); + reject(err); + } + }); + }); + } + + /** + * Refine existing detections with additional prompts + */ + async refineDetections(request: { + imagePath: string; + detections: { + box: [number, number, number, number]; + polygon?: [number, number][]; + score: number; + label: string; + }[]; + points?: [number, number][]; + pointLabels?: number[]; + refineMasks?: boolean; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + }): Promise { + if (!this.isReady()) { + throw new Error('Segmentation service is not ready. Call initialize() first.'); + } + + if (!this.process?.stdin) { + throw new Error('Segmentation service stdin is not available'); + } + + const id = this.generateRequestId(); + const fullRequest = { + id, + command: 'refine', + image_path: request.imagePath, + detections: request.detections, + points: request.points, + point_labels: request.pointLabels, + refine_masks: request.refineMasks ?? true, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error(`Refine request timed out after ${this.requestTimeoutMs}ms`)); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { resolve, reject, timeout }); + + const requestLine = `${JSON.stringify(fullRequest)}\n`; + this.process!.stdin!.write(requestLine, (err) => { + if (err) { + clearTimeout(timeout); + this.pendingRequests.delete(id); + reject(err); + } + }); + }); + } + + /** + * Clean up internal state after process exits + */ + private cleanup(): void { + // Reject all pending requests + this.pendingRequests.forEach((pending) => { + clearTimeout(pending.timeout); + pending.reject(new Error('Segmentation service terminated')); + }); + this.pendingRequests.clear(); + + if (this.readline) { + this.readline.close(); + this.readline = null; + } + + this.process = null; + this.emit('shutdown'); + } + + /** + * Gracefully shutdown the segmentation service + */ + async shutdown(): Promise { + if (!this.process) { + return undefined; + } + + console.log('[Segmentation] Shutting down segmentation service...'); + + await new Promise((resolve) => { + // Send shutdown command + const reqId = this.generateRequestId(); + const request = { id: reqId, command: 'shutdown' }; + + if (this.process?.stdin?.writable) { + this.process.stdin.write(`${JSON.stringify(request)}\n`); + } + + // Wait for process to exit or timeout + const timeoutId = setTimeout(() => { + if (this.process) { + console.log('[Segmentation] Force killing segmentation service...'); + this.process.kill('SIGTERM'); + } + this.cleanup(); + resolve(); + }, 5000); + + if (this.process) { + this.process.once('exit', () => { + clearTimeout(timeoutId); + this.cleanup(); + resolve(); + }); + } else { + clearTimeout(timeoutId); + resolve(); + } + }); + return undefined; + } +} + +// Singleton instance +let serviceManager: SegmentationServiceManager | null = null; + +/** + * Get the segmentation service manager singleton + */ +export function getSegmentationServiceManager(): SegmentationServiceManager { + if (!serviceManager) { + serviceManager = new SegmentationServiceManager(); + } + return serviceManager; +} + +/** + * Shutdown the segmentation service (call on app close) + */ +export async function shutdownSegmentationService(): Promise { + if (serviceManager) { + await serviceManager.shutdown(); + serviceManager = null; + } +} + +// Export type aliases for generic naming +export type SegmentationPredictRequest = Omit; +export type SegmentationPredictResponse = SegmentationInternalPredictResponse; diff --git a/client/platform/desktop/frontend/api.ts b/client/platform/desktop/frontend/api.ts index fb71cdd3b..50ae595b5 100644 --- a/client/platform/desktop/frontend/api.ts +++ b/client/platform/desktop/frontend/api.ts @@ -10,6 +10,8 @@ import type { DatasetMetaMutable, DatasetType, MultiCamImportArgs, Pipe, Pipelines, SaveAttributeArgs, SaveAttributeTrackFilterArgs, SaveDetectionsArgs, TrainingConfigs, + SegmentationPredictRequest, SegmentationPredictResponse, SegmentationStatusResponse, + TextQueryRequest, TextQueryResponse, RefineDetectionsRequest, RefineDetectionsResponse, } from 'dive-common/apispec'; import { @@ -205,6 +207,47 @@ async function cancelJob(job: DesktopJob): Promise { return ipcRenderer.invoke('cancel-job', job); } +/** + * Interactive Segmentation API + */ + +async function segmentationInitialize(): Promise<{ success: boolean }> { + return ipcRenderer.invoke('segmentation-initialize'); +} + +async function segmentationPredict(request: SegmentationPredictRequest): Promise { + return ipcRenderer.invoke('segmentation-predict', request); +} + +async function segmentationSetImage(imagePath: string): Promise<{ success: boolean }> { + return ipcRenderer.invoke('segmentation-set-image', imagePath); +} + +async function segmentationClearImage(): Promise<{ success: boolean }> { + return ipcRenderer.invoke('segmentation-clear-image'); +} + +async function segmentationShutdown(): Promise<{ success: boolean }> { + return ipcRenderer.invoke('segmentation-shutdown'); +} + +async function segmentationIsReady(): Promise { + return ipcRenderer.invoke('segmentation-is-ready'); +} + +/** + * Text Query API + * Allows open-vocabulary detection and segmentation using text prompts + */ + +async function textQuery(request: TextQueryRequest): Promise { + return ipcRenderer.invoke('segmentation-text-query', request); +} + +async function refineDetections(request: RefineDetectionsRequest): Promise { + return ipcRenderer.invoke('segmentation-refine', request); +} + /** * REST api for larger-body messages */ @@ -277,4 +320,14 @@ export { openLink, nvidiaSmi, cancelJob, + /* Segmentation APIs */ + segmentationInitialize, + segmentationPredict, + segmentationSetImage, + segmentationClearImage, + segmentationShutdown, + segmentationIsReady, + /* Text Query APIs */ + textQuery, + refineDetections, }; diff --git a/client/platform/desktop/frontend/components/ViewerLoader.vue b/client/platform/desktop/frontend/components/ViewerLoader.vue index 94ca70d8c..62cc67eb3 100644 --- a/client/platform/desktop/frontend/components/ViewerLoader.vue +++ b/client/platform/desktop/frontend/components/ViewerLoader.vue @@ -1,6 +1,7 @@ + + From caf10f6931a9ff2b307fadd3d9f7c3d9bcf71b4e Mon Sep 17 00:00:00 2001 From: Matt Dawkins Date: Fri, 30 Jan 2026 12:26:43 -0500 Subject: [PATCH 19/30] Fix missing isStereoDataset prop in horizontal and bottom sidebar layouts The stereo interactive mode toggle was only visible in the vertical sidebar layout. The horizontal (Sidebar.vue) and bottom (Viewer.vue) layouts were not passing the isStereoDataset prop to TrackSettingsPanel. Co-Authored-By: Claude Opus 4.5 --- client/dive-common/components/Sidebar.vue | 208 +++++++- client/dive-common/components/Viewer.vue | 574 +++++++++++++++++++++- 2 files changed, 766 insertions(+), 16 deletions(-) diff --git a/client/dive-common/components/Sidebar.vue b/client/dive-common/components/Sidebar.vue index 6b5185ead..7642c1fdd 100644 --- a/client/dive-common/components/Sidebar.vue +++ b/client/dive-common/components/Sidebar.vue @@ -18,6 +18,7 @@ import { } from 'vue-media-annotator/provides'; import { clientSettings } from 'dive-common/store/settings'; +import ConfidenceFilter from 'dive-common/components/ConfidenceFilter.vue'; import TrackDetailsPanel from 'dive-common/components/TrackDetailsPanel.vue'; import TrackSettingsPanel from 'dive-common/components/TrackSettingsPanel.vue'; import TypeSettingsPanel from 'dive-common/components/TypeSettingsPanel.vue'; @@ -27,6 +28,7 @@ import { usePrompt } from 'dive-common/vue-utilities/prompt-service'; export default defineComponent({ components: { + ConfidenceFilter, StackedVirtualSidebarContainer, TrackDetailsPanel, TrackSettingsPanel, @@ -43,6 +45,10 @@ export default defineComponent({ type: Boolean, default: true, }, + horizontal: { + type: Boolean, + default: false, + }, isStereoDataset: { type: Boolean, default: false, @@ -67,7 +73,9 @@ export default defineComponent({ const styleManager = useTrackStyleManager(); const data = reactive({ - currentTab: 'tracks' as 'tracks' | 'attributes', + currentTab: 'tracks' as 'tracks' | 'attributes' | 'types', + // For horizontal mode, cycle through 3 tabs + horizontalTab: 'tracks' as 'tracks' | 'attributes' | 'types', }); function swapTabs() { @@ -78,6 +86,28 @@ export default defineComponent({ } } + function cycleHorizontalTabs() { + if (data.horizontalTab === 'tracks') { + data.horizontalTab = 'attributes'; + } else if (data.horizontalTab === 'attributes') { + data.horizontalTab = 'types'; + } else { + data.horizontalTab = 'tracks'; + } + } + + const horizontalTabIcon = computed(() => { + if (data.horizontalTab === 'tracks') return 'mdi-format-list-bulleted'; + if (data.horizontalTab === 'attributes') return 'mdi-card-text'; + return 'mdi-filter-variant'; + }); + + const horizontalTabTooltip = computed(() => { + if (data.horizontalTab === 'tracks') return 'Detection List (click to cycle)'; + if (data.horizontalTab === 'attributes') return 'Detection Details (click to cycle)'; + return 'Type Filters (click to cycle)'; + }); + function doToggleMerge() { if (toggleMerge().length) { data.currentTab = 'attributes'; @@ -125,17 +155,23 @@ export default defineComponent({ readOnlyMode, styleManager, disableAnnotationFilters: trackFilterControls.disableAnnotationFilters, + confidenceFilters: trackFilterControls.confidenceFilters, visible, + horizontalTabIcon, + horizontalTabTooltip, /* methods */ doToggleMerge, swapTabs, + cycleHorizontalTabs, }; }, }); + + +
+ + + {{ horizontalTabTooltip }} + + +
+ +
+ +
+ +
+ + + +
+
+ +
+ +
+ +
+ + + +
+
diff --git a/client/dive-common/components/Viewer.vue b/client/dive-common/components/Viewer.vue index 8bb3757a7..f2a2896b5 100644 --- a/client/dive-common/components/Viewer.vue +++ b/client/dive-common/components/Viewer.vue @@ -1,6 +1,6 @@