diff --git a/crates/client/src/lib.rs b/crates/client/src/lib.rs index f523e0e0..002c634b 100644 --- a/crates/client/src/lib.rs +++ b/crates/client/src/lib.rs @@ -79,8 +79,8 @@ pub use nickname::{MemNicknameStore, NicknameStore, NicknameStoreHandle, NICKNAM pub use ops::{pack_wire, unpack_wire, VoiceSignalPayload, WireMessage}; pub use queue::{ArrivedSummary, QueueSummary, RelayStatus}; pub use search::{ - IndexableMessage, RecentQuery, SearchIndex, SearchIndexBuildStatus, SearchIndexConfig, - SearchIndexHandle, SearchQuery, SearchResult, SearchScope, + derive_has_image_file, IndexableMessage, RecentQuery, SearchIndex, SearchIndexBuildStatus, + SearchIndexConfig, SearchIndexHandle, SearchQuery, SearchResult, SearchScope, }; pub use trust::{ ComparePreview, InMemoryTrustStore, PeerTrust, TrustStore, TrustStoreHandle, UnverifiedReason, diff --git a/crates/client/src/search/derive.rs b/crates/client/src/search/derive.rs new file mode 100644 index 00000000..c2b6edc1 --- /dev/null +++ b/crates/client/src/search/derive.rs @@ -0,0 +1,168 @@ +//! Body-string derivation helpers for [`super::IndexableMessage`]. +//! +//! `DisplayMessage` doesn't carry an `attachments` field — files are +//! inlined into the message body as `[file::]` by +//! [`crate::ClientHandle::share_file_inline`]. The search index needs +//! booleans (`has_image`, `has_file`) to power the `has:image` / +//! `has:file` operators (see `local-search.md` §Operator filters), so +//! the body string is the source of truth. +//! +//! This module concentrates that derivation in one pure function so +//! the indexer call site in `crates/web/src/app.rs` doesn't have to +//! re-encode the inline-file format, and so a future native consumer +//! (agent / CLI) gets the same answers. +//! +//! Image vs file distinction follows the rendering rule in +//! `crates/web/src/components/message.rs`: an inline file with an +//! image extension renders as an inline embed (so `has:image`); any +//! other inline file renders as a download card (so `has:file`). A +//! body that carries no inline file but contains an image URL still +//! produces an inline embed in the message renderer, so `has:image` +//! also fires for that case. URLs to non-image files don't produce a +//! visible attachment, so `has:file` is reserved for the inline-file +//! payload format only. + +/// Filename extensions the message renderer treats as inline-image +/// embeds. Kept in sync with `IMAGE_EXTENSIONS` in +/// `crates/web/src/components/mod.rs` (which scopes URL-driven embeds +/// to the same set). +const IMAGE_EXTENSIONS: &[&str] = &[ + ".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg", ".bmp", ".ico", +]; + +/// Derive `(has_image, has_file)` for an `IndexableMessage` from the +/// already-decrypted body string. +/// +/// Rules (mirrors message-renderer logic): +/// - body starts with `[file:` and ends with `]` with at least one +/// inner `:` separator → inline file payload. If the filename has an +/// image extension → `has_image`; otherwise → `has_file`. +/// - body contains an `http(s)://…` URL → `has_image` +/// (auto-embed path in the renderer). +/// - otherwise both false. +pub fn derive_has_image_file(body: &str) -> (bool, bool) { + if let Some(filename) = inline_file_filename(body) { + let is_image = is_image_filename(&filename); + return (is_image, !is_image); + } + let has_image = body_contains_image_url(body); + (has_image, false) +} + +/// Extract the filename from a body that matches the +/// `[file::]` inline-file format. Returns `None` for +/// any other body shape (including malformed prefixes / missing inner +/// colon). +fn inline_file_filename(body: &str) -> Option { + let inner = body.strip_prefix("[file:")?.strip_suffix(']')?; + let colon = inner.find(':')?; + Some(inner[..colon].to_string()) +} + +fn is_image_filename(filename: &str) -> bool { + let lower = filename.to_lowercase(); + IMAGE_EXTENSIONS.iter().any(|ext| lower.ends_with(ext)) +} + +/// Cheap scan: does `body` contain a `http://` or `https://` URL whose +/// path ends in a known image extension? Mirrors the URL stage in +/// `crates/web/src/components/message.rs::extract_urls` without +/// pulling in the full segment splitter — the indexer only needs the +/// boolean. +fn body_contains_image_url(body: &str) -> bool { + let mut idx = 0; + while idx < body.len() { + let rest = &body[idx..]; + let Some(start) = rest + .find("https://") + .map(|i| (i, "https://".len())) + .or_else(|| rest.find("http://").map(|i| (i, "http://".len()))) + .map(|(i, _)| i) + else { + return false; + }; + let abs_start = idx + start; + let url_rest = &body[abs_start..]; + let url_end = url_rest + .find(|c: char| c.is_whitespace() || c == '>' || c == ')' || c == ']') + .map(|i| abs_start + i) + .unwrap_or(body.len()); + let url = &body[abs_start..url_end]; + // Strip query/fragment before extension check — `?` / `#` are + // not whitespace so the URL extractor keeps them in `url`. + let path = url.split(['?', '#']).next().unwrap_or(url); + let lower = path.to_lowercase(); + if IMAGE_EXTENSIONS.iter().any(|ext| lower.ends_with(ext)) { + return true; + } + idx = url_end; + } + false +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn plain_text_body_yields_no_attachments() { + assert_eq!(derive_has_image_file("hello world"), (false, false)); + assert_eq!(derive_has_image_file(""), (false, false)); + } + + #[test] + fn inline_image_file_sets_has_image_only() { + let body = "[file:photo.png:Zm9vYmFy]"; + assert_eq!(derive_has_image_file(body), (true, false)); + } + + #[test] + fn inline_image_file_uppercase_extension() { + let body = "[file:Photo.JPG:Zm9v]"; + assert_eq!(derive_has_image_file(body), (true, false)); + } + + #[test] + fn inline_non_image_file_sets_has_file_only() { + let body = "[file:notes.txt:Zm9vYmFy]"; + assert_eq!(derive_has_image_file(body), (false, true)); + } + + #[test] + fn inline_pdf_is_a_file_not_image() { + let body = "[file:report.pdf:Zm9v]"; + assert_eq!(derive_has_image_file(body), (false, true)); + } + + #[test] + fn malformed_inline_file_falls_through_to_url_check() { + // No inner colon — not the inline-file shape. + assert_eq!(derive_has_image_file("[file:nodatahere]"), (false, false)); + // Missing closing bracket. + assert_eq!(derive_has_image_file("[file:foo.png:abc"), (false, false)); + } + + #[test] + fn body_with_image_url_sets_has_image() { + let body = "look at this https://example.com/cat.png cute"; + assert_eq!(derive_has_image_file(body), (true, false)); + } + + #[test] + fn body_with_image_url_query_string() { + let body = "https://cdn.example.com/cat.jpg?w=400"; + assert_eq!(derive_has_image_file(body), (true, false)); + } + + #[test] + fn body_with_non_image_url_no_attachment_flags() { + let body = "https://example.com/article"; + assert_eq!(derive_has_image_file(body), (false, false)); + } + + #[test] + fn body_with_multiple_urls_finds_image() { + let body = "https://a.com/x https://b.com/photo.gif"; + assert_eq!(derive_has_image_file(body), (true, false)); + } +} diff --git a/crates/client/src/search/mod.rs b/crates/client/src/search/mod.rs index 5e6c5539..f5caf3bf 100644 --- a/crates/client/src/search/mod.rs +++ b/crates/client/src/search/mod.rs @@ -11,6 +11,7 @@ pub mod actor; pub mod config; +pub mod derive; pub mod execute; pub mod handle; pub mod highlight; @@ -25,6 +26,7 @@ mod tests; pub use config::{ clear_all_recents, forget_recent, push_recent, RecentQuery, SearchIndexConfig, MAX_RECENTS, }; +pub use derive::derive_has_image_file; pub use execute::{execute, SearchResult, SearchScope}; pub use handle::SearchIndexHandle; pub use highlight::{build_excerpt, match_ranges, Excerpt}; diff --git a/crates/client/src/search/tests.rs b/crates/client/src/search/tests.rs index f24951dd..b247b3b9 100644 --- a/crates/client/src/search/tests.rs +++ b/crates/client/src/search/tests.rs @@ -536,6 +536,57 @@ mod execute_tests { assert_eq!(hits[0].message_id, "m3"); } + /// Regression for #355 — the indexer call site in + /// `crates/web/src/app.rs` previously hard-coded + /// `has_image: false` / `has_file: false`, so the documented + /// `has:image` / `has:file` operators silently returned zero hits. + /// This test wires the body strings produced by + /// `ClientHandle::share_file_inline` (and an image URL) through + /// `derive_has_image_file` and asserts the executor returns the + /// expected message ids. + #[test] + fn derived_attachment_flags_drive_has_image_and_has_file_operators() { + use super::super::derive::derive_has_image_file; + let mut idx = SearchIndex::new(); + let bodies = [ + ("m_text", "just talking, no attachments"), + ("m_inline_png", "[file:cat.png:Zm9v]"), + ("m_inline_pdf", "[file:report.pdf:Zm9v]"), + ("m_image_url", "look https://cdn.example.com/cat.jpg yo"), + ]; + for (id, body) in bodies { + let (img, file) = derive_has_image_file(body); + // `has_link` is left to the existing call-site rule; + // irrelevant to this regression. + let link = body.contains("http://") || body.contains("https://"); + idx.insert(mk( + id, + body, + "c1", + "general", + 100, + "Mira", + "mira", + Some("g0"), + None, + img, + file, + link, + )); + } + + let img_q = parse_query("has:image"); + let img_hits = execute(&idx, &img_q, &SearchScope::AllGrovesAndLetters); + let mut img_ids: Vec<&str> = img_hits.iter().map(|h| h.message_id.as_str()).collect(); + img_ids.sort(); + assert_eq!(img_ids, vec!["m_image_url", "m_inline_png"]); + + let file_q = parse_query("has:file"); + let file_hits = execute(&idx, &file_q, &SearchScope::AllGrovesAndLetters); + let file_ids: Vec<&str> = file_hits.iter().map(|h| h.message_id.as_str()).collect(); + assert_eq!(file_ids, vec!["m_inline_pdf"]); + } + #[test] fn results_ordered_desc_by_timestamp() { let idx = seed_index(); diff --git a/crates/web/src/app.rs b/crates/web/src/app.rs index 27064f0e..ef9d44d7 100644 --- a/crates/web/src/app.rs +++ b/crates/web/src/app.rs @@ -378,6 +378,15 @@ pub fn App() -> impl IntoView { // key. Proper URL parsing lives in message-row // rendering; this is the cheap version. let has_link = m.body.contains("http://") || m.body.contains("https://"); + // `has:image` / `has:file` derive from the body + // string (inline `[file:name:b64]` payload + + // image-URL embeds) — `DisplayMessage` doesn't + // carry a separate attachments field. See + // `willow_client::search::derive`. `letter_id` + // stays `None` until the active-letter signal + // lands alongside `letters-dms.md` (issue #355 + // follow-up). + let (has_image, has_file) = willow_client::derive_has_image_file(&m.body); willow_client::IndexableMessage { message_id: m.id, channel_id: m.channel_id.clone(), @@ -393,8 +402,8 @@ pub fn App() -> impl IntoView { author_display_name: m.author_display_name, timestamp_ms: m.timestamp_ms, body: m.body, - has_image: false, - has_file: false, + has_image, + has_file, has_link, } })