diff --git a/crates/client/src/search/index.rs b/crates/client/src/search/index.rs index 8638ec20..c198bff4 100644 --- a/crates/client/src/search/index.rs +++ b/crates/client/src/search/index.rs @@ -16,6 +16,59 @@ use std::sync::Arc; use willow_identity::EndpointId; +use crate::state::DisplayMessage; + +/// File extensions that classify a body / attachment as an image for +/// the `has:image` operator. Mirrors the web UI's `IMAGE_EXTENSIONS` +/// list in `crates/web/src/components/message.rs` — keep in sync if +/// either one grows. +const IMAGE_EXTENSIONS: &[&str] = &[ + ".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg", ".bmp", ".ico", +]; + +/// Strip an inline-file body of the form `[file:NAME:BASE64]` and +/// return `NAME`. Returns `None` for plain bodies. +/// +/// The web UI sends inline file attachments via this body shape (see +/// `crates/client/src/actions.rs::share_file_inline`), so the indexer +/// classifies attachments by sniffing the body without needing a +/// separate attachments field on `DisplayMessage`. +fn inline_file_name(body: &str) -> Option<&str> { + let inner = body.strip_prefix("[file:")?.strip_suffix(']')?; + let colon = inner.find(':')?; + Some(&inner[..colon]) +} + +/// True if `name` ends with an image-file extension (case-insensitive). +fn is_image_name(name: &str) -> bool { + let lower = name.to_lowercase(); + IMAGE_EXTENSIONS.iter().any(|ext| lower.ends_with(ext)) +} + +/// True if `body` contains a URL whose path ends in an image +/// extension. Cheap scan — proper URL parsing happens in the message +/// row renderer; this mirror only needs to be precise enough for the +/// `has:image` operator. +fn body_has_image_url(body: &str) -> bool { + for prefix in ["http://", "https://"] { + let mut rest = body; + while let Some(idx) = rest.find(prefix) { + let url_start = idx; + let after = &rest[url_start..]; + let url_end = after + .find(|c: char| c.is_whitespace() || c == '>' || c == ')' || c == ']') + .unwrap_or(after.len()); + let url = &after[..url_end]; + let path = url.split('?').next().unwrap_or(url); + if is_image_name(path) { + return true; + } + rest = &after[url_end..]; + } + } + false +} + /// One message ready to be indexed. /// /// All the metadata the executor needs to apply scope + operator @@ -51,6 +104,58 @@ pub struct IndexableMessage { pub has_link: bool, } +impl IndexableMessage { + /// Build an [`IndexableMessage`] from a [`DisplayMessage`], with + /// the operator-flag fields (`has_image`, `has_file`, `has_link`) + /// derived from the body so the search executor's `has:image` / + /// `has:file` / `has:link` filters actually match. + /// + /// Per `docs/specs/2026-04-19-ui-design/local-search.md` §Operators: + /// the index is the source of truth for `has:` filtering. + /// Classification rules: + /// + /// - Inline `[file:NAME:b64]` bodies (sent by + /// `share_file_inline`) → `has_image` if `NAME` has an image + /// extension, else `has_file`. + /// - Bare URLs whose path ends in an image extension → + /// `has_image` (`https://example.com/cat.jpg`). + /// - Any `http://` or `https://` substring → `has_link`. + /// + /// `letter_id` is plumbed through verbatim — the caller resolves + /// it from the active letter context (or passes `None` for + /// grove-channel messages). See issue #355 for the missing + /// plumbing context. + pub fn from_display_message( + m: &DisplayMessage, + channel_name: &str, + grove_id: Option, + letter_id: Option, + ) -> Self { + let inline_name = inline_file_name(&m.body); + let has_inline_image = inline_name.is_some_and(is_image_name); + let has_inline_file = inline_name.is_some() && !has_inline_image; + let has_image = has_inline_image || body_has_image_url(&m.body); + let has_file = has_inline_file; + let has_link = m.body.contains("http://") || m.body.contains("https://"); + + Self { + message_id: m.id.clone(), + channel_id: m.channel_id.clone(), + channel_name: channel_name.to_string(), + grove_id, + letter_id, + author_peer_id: m.author_peer_id, + author_handle: m.author_display_name.to_lowercase(), + author_display_name: m.author_display_name.clone(), + timestamp_ms: m.timestamp_ms, + body: m.body.clone(), + has_image, + has_file, + has_link, + } + } +} + /// One row stored in the inverted index. Cheaply cloned into /// `SearchResult`s at execute time. #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/crates/client/src/search/tests.rs b/crates/client/src/search/tests.rs index f24951dd..f1fa7432 100644 --- a/crates/client/src/search/tests.rs +++ b/crates/client/src/search/tests.rs @@ -940,3 +940,102 @@ mod query_tests { assert_eq!(q.raw, "Hello"); } } + +mod from_display_message_tests { + //! `IndexableMessage::from_display_message` derives the operator + //! flags (`has_image`, `has_file`, `has_link`) from a + //! [`DisplayMessage`]. Per `docs/specs/2026-04-19-ui-design/local-search.md` + //! §Operators, the index must populate these so `has:image` / + //! `has:file` / `has:link` queries actually match — see issue + //! #355. + use super::super::index::IndexableMessage; + use crate::state::{DisplayMessage, QueueNote}; + use std::collections::HashMap; + use willow_identity::Identity; + + fn dm(id: &str, body: &str) -> DisplayMessage { + DisplayMessage { + id: id.into(), + channel_id: "c1".into(), + author_peer_id: Identity::generate().endpoint_id(), + author_display_name: "Mira".into(), + body: body.into(), + is_local: false, + timestamp_ms: 100, + reactions: HashMap::new(), + edited: false, + deleted: false, + reply_to: None, + reply_preview: None, + mentions: Vec::new(), + pinned: false, + whisper: false, + queue_note: QueueNote::None, + } + } + + #[test] + fn inline_image_attachment_sets_has_image() { + // `[file:NAME:b64]` where NAME has an image extension renders + // inline as an image embed in the web UI; the index must + // mirror that classification so `has:image` matches. + let body = format!( + "[file:photo.png:{}]", + crate::base64::encode(b"\x89PNG\r\n\x1a\n") + ); + let m = dm("m1", &body); + let ix = IndexableMessage::from_display_message(&m, "general", None, None); + assert!(ix.has_image, "image attachment must set has_image"); + assert!(!ix.has_file, "image attachment must not set has_file"); + } + + #[test] + fn inline_non_image_attachment_sets_has_file() { + let body = format!("[file:notes.txt:{}]", crate::base64::encode(b"hello")); + let m = dm("m2", &body); + let ix = IndexableMessage::from_display_message(&m, "general", None, None); + assert!(ix.has_file, "non-image attachment must set has_file"); + assert!(!ix.has_image, "non-image attachment must not set has_image"); + } + + #[test] + fn image_url_in_body_sets_has_image() { + // Bare URL pointing at an image extension also lights up + // `has:image` — mirrors the UI's `is_image_url` rule. + let m = dm("m3", "look at https://example.com/cat.jpg"); + let ix = IndexableMessage::from_display_message(&m, "general", None, None); + assert!(ix.has_image, "image URL must set has_image"); + assert!(ix.has_link, "URL must set has_link"); + } + + #[test] + fn plain_url_sets_has_link_only() { + let m = dm("m4", "see https://willow.im/docs"); + let ix = IndexableMessage::from_display_message(&m, "general", None, None); + assert!(ix.has_link); + assert!(!ix.has_image); + assert!(!ix.has_file); + } + + #[test] + fn plain_text_sets_no_flags() { + let m = dm("m5", "hello world"); + let ix = IndexableMessage::from_display_message(&m, "general", None, None); + assert!(!ix.has_image); + assert!(!ix.has_file); + assert!(!ix.has_link); + } + + #[test] + fn grove_and_letter_id_passed_through() { + let m = dm("m6", "hi"); + let ix = IndexableMessage::from_display_message( + &m, + "general", + Some("g0".into()), + Some("L1".into()), + ); + assert_eq!(ix.grove_id.as_deref(), Some("g0")); + assert_eq!(ix.letter_id.as_deref(), Some("L1")); + } +} diff --git a/crates/web/src/app.rs b/crates/web/src/app.rs index 06c15bb7..e8accfa2 100644 --- a/crates/web/src/app.rs +++ b/crates/web/src/app.rs @@ -379,33 +379,26 @@ pub fn App() -> impl IntoView { let grove_id = active_server_sig.get(); let local_peer = peer_id_sig.get(); + // `IndexableMessage::from_display_message` derives the + // `has_image` / `has_file` / `has_link` operator flags + // from the body so `has:image` / `has:file` / `has:link` + // queries actually match. `letter_id` stays `None` here — + // the active-letter signal isn't yet wired into this + // effect; tracked as a follow-up to issue #355. + let grove = if grove_id.is_empty() { + None + } else { + Some(grove_id.clone()) + }; let indexable: Vec = msgs .into_iter() .map(|m| { - let author_peer_id = m.author_peer_id; - // Lightweight link detection — `has:link` operator - // key. Proper URL parsing lives in message-row - // rendering; this is the cheap version. - let has_link = m.body.contains("http://") || m.body.contains("https://"); - willow_client::IndexableMessage { - message_id: m.id, - channel_id: m.channel_id.clone(), - channel_name: current_ch.clone(), - grove_id: if grove_id.is_empty() { - None - } else { - Some(grove_id.clone()) - }, - letter_id: None, - author_peer_id, - author_handle: m.author_display_name.to_lowercase(), - author_display_name: m.author_display_name, - timestamp_ms: m.timestamp_ms, - body: m.body, - has_image: false, - has_file: false, - has_link, - } + willow_client::IndexableMessage::from_display_message( + &m, + ¤t_ch, + grove.clone(), + None, + ) }) .collect(); // Ignore local peer binding to avoid unused_variables clippy