Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions crates/client/src/search/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,59 @@ use std::sync::Arc;

use willow_identity::EndpointId;

use crate::state::DisplayMessage;

/// File extensions that classify a body / attachment as an image for
/// the `has:image` operator. Mirrors the web UI's `IMAGE_EXTENSIONS`
/// list in `crates/web/src/components/message.rs` — keep in sync if
/// either one grows.
const IMAGE_EXTENSIONS: &[&str] = &[
".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg", ".bmp", ".ico",
];

/// Strip an inline-file body of the form `[file:NAME:BASE64]` and
/// return `NAME`. Returns `None` for plain bodies.
///
/// The web UI sends inline file attachments via this body shape (see
/// `crates/client/src/actions.rs::share_file_inline`), so the indexer
/// classifies attachments by sniffing the body without needing a
/// separate attachments field on `DisplayMessage`.
fn inline_file_name(body: &str) -> Option<&str> {
let inner = body.strip_prefix("[file:")?.strip_suffix(']')?;
let colon = inner.find(':')?;
Some(&inner[..colon])
}

/// True if `name` ends with an image-file extension (case-insensitive).
fn is_image_name(name: &str) -> bool {
let lower = name.to_lowercase();
IMAGE_EXTENSIONS.iter().any(|ext| lower.ends_with(ext))
}

/// True if `body` contains a URL whose path ends in an image
/// extension. Cheap scan — proper URL parsing happens in the message
/// row renderer; this mirror only needs to be precise enough for the
/// `has:image` operator.
fn body_has_image_url(body: &str) -> bool {
for prefix in ["http://", "https://"] {
let mut rest = body;
while let Some(idx) = rest.find(prefix) {
let url_start = idx;
let after = &rest[url_start..];
let url_end = after
.find(|c: char| c.is_whitespace() || c == '>' || c == ')' || c == ']')
.unwrap_or(after.len());
let url = &after[..url_end];
let path = url.split('?').next().unwrap_or(url);
if is_image_name(path) {
return true;
}
rest = &after[url_end..];
}
}
false
}

/// One message ready to be indexed.
///
/// All the metadata the executor needs to apply scope + operator
Expand Down Expand Up @@ -51,6 +104,58 @@ pub struct IndexableMessage {
pub has_link: bool,
}

impl IndexableMessage {
/// Build an [`IndexableMessage`] from a [`DisplayMessage`], with
/// the operator-flag fields (`has_image`, `has_file`, `has_link`)
/// derived from the body so the search executor's `has:image` /
/// `has:file` / `has:link` filters actually match.
///
/// Per `docs/specs/2026-04-19-ui-design/local-search.md` §Operators:
/// the index is the source of truth for `has:` filtering.
/// Classification rules:
///
/// - Inline `[file:NAME:b64]` bodies (sent by
/// `share_file_inline`) → `has_image` if `NAME` has an image
/// extension, else `has_file`.
/// - Bare URLs whose path ends in an image extension →
/// `has_image` (`https://example.com/cat.jpg`).
/// - Any `http://` or `https://` substring → `has_link`.
///
/// `letter_id` is plumbed through verbatim — the caller resolves
/// it from the active letter context (or passes `None` for
/// grove-channel messages). See issue #355 for the missing
/// plumbing context.
pub fn from_display_message(
m: &DisplayMessage,
channel_name: &str,
grove_id: Option<String>,
letter_id: Option<String>,
) -> Self {
let inline_name = inline_file_name(&m.body);
let has_inline_image = inline_name.is_some_and(is_image_name);
let has_inline_file = inline_name.is_some() && !has_inline_image;
let has_image = has_inline_image || body_has_image_url(&m.body);
let has_file = has_inline_file;
let has_link = m.body.contains("http://") || m.body.contains("https://");

Self {
message_id: m.id.clone(),
channel_id: m.channel_id.clone(),
channel_name: channel_name.to_string(),
grove_id,
letter_id,
author_peer_id: m.author_peer_id,
author_handle: m.author_display_name.to_lowercase(),
author_display_name: m.author_display_name.clone(),
timestamp_ms: m.timestamp_ms,
body: m.body.clone(),
has_image,
has_file,
has_link,
}
}
}

/// One row stored in the inverted index. Cheaply cloned into
/// `SearchResult`s at execute time.
#[derive(Debug, Clone, PartialEq, Eq)]
Expand Down
99 changes: 99 additions & 0 deletions crates/client/src/search/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -940,3 +940,102 @@ mod query_tests {
assert_eq!(q.raw, "Hello");
}
}

mod from_display_message_tests {
//! `IndexableMessage::from_display_message` derives the operator
//! flags (`has_image`, `has_file`, `has_link`) from a
//! [`DisplayMessage`]. Per `docs/specs/2026-04-19-ui-design/local-search.md`
//! §Operators, the index must populate these so `has:image` /
//! `has:file` / `has:link` queries actually match — see issue
//! #355.
use super::super::index::IndexableMessage;
use crate::state::{DisplayMessage, QueueNote};
use std::collections::HashMap;
use willow_identity::Identity;

fn dm(id: &str, body: &str) -> DisplayMessage {
DisplayMessage {
id: id.into(),
channel_id: "c1".into(),
author_peer_id: Identity::generate().endpoint_id(),
author_display_name: "Mira".into(),
body: body.into(),
is_local: false,
timestamp_ms: 100,
reactions: HashMap::new(),
edited: false,
deleted: false,
reply_to: None,
reply_preview: None,
mentions: Vec::new(),
pinned: false,
whisper: false,
queue_note: QueueNote::None,
}
}

#[test]
fn inline_image_attachment_sets_has_image() {
// `[file:NAME:b64]` where NAME has an image extension renders
// inline as an image embed in the web UI; the index must
// mirror that classification so `has:image` matches.
let body = format!(
"[file:photo.png:{}]",
crate::base64::encode(b"\x89PNG\r\n\x1a\n")
);
let m = dm("m1", &body);
let ix = IndexableMessage::from_display_message(&m, "general", None, None);
assert!(ix.has_image, "image attachment must set has_image");
assert!(!ix.has_file, "image attachment must not set has_file");
}

#[test]
fn inline_non_image_attachment_sets_has_file() {
let body = format!("[file:notes.txt:{}]", crate::base64::encode(b"hello"));
let m = dm("m2", &body);
let ix = IndexableMessage::from_display_message(&m, "general", None, None);
assert!(ix.has_file, "non-image attachment must set has_file");
assert!(!ix.has_image, "non-image attachment must not set has_image");
}

#[test]
fn image_url_in_body_sets_has_image() {
// Bare URL pointing at an image extension also lights up
// `has:image` — mirrors the UI's `is_image_url` rule.
let m = dm("m3", "look at https://example.com/cat.jpg");
let ix = IndexableMessage::from_display_message(&m, "general", None, None);
assert!(ix.has_image, "image URL must set has_image");
assert!(ix.has_link, "URL must set has_link");
}

#[test]
fn plain_url_sets_has_link_only() {
let m = dm("m4", "see https://willow.im/docs");
let ix = IndexableMessage::from_display_message(&m, "general", None, None);
assert!(ix.has_link);
assert!(!ix.has_image);
assert!(!ix.has_file);
}

#[test]
fn plain_text_sets_no_flags() {
let m = dm("m5", "hello world");
let ix = IndexableMessage::from_display_message(&m, "general", None, None);
assert!(!ix.has_image);
assert!(!ix.has_file);
assert!(!ix.has_link);
}

#[test]
fn grove_and_letter_id_passed_through() {
let m = dm("m6", "hi");
let ix = IndexableMessage::from_display_message(
&m,
"general",
Some("g0".into()),
Some("L1".into()),
);
assert_eq!(ix.grove_id.as_deref(), Some("g0"));
assert_eq!(ix.letter_id.as_deref(), Some("L1"));
}
}
41 changes: 17 additions & 24 deletions crates/web/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,33 +379,26 @@ pub fn App() -> impl IntoView {
let grove_id = active_server_sig.get();
let local_peer = peer_id_sig.get();

// `IndexableMessage::from_display_message` derives the
// `has_image` / `has_file` / `has_link` operator flags
// from the body so `has:image` / `has:file` / `has:link`
// queries actually match. `letter_id` stays `None` here —
// the active-letter signal isn't yet wired into this
// effect; tracked as a follow-up to issue #355.
let grove = if grove_id.is_empty() {
None
} else {
Some(grove_id.clone())
};
let indexable: Vec<willow_client::IndexableMessage> = msgs
.into_iter()
.map(|m| {
let author_peer_id = m.author_peer_id;
// Lightweight link detection — `has:link` operator
// key. Proper URL parsing lives in message-row
// rendering; this is the cheap version.
let has_link = m.body.contains("http://") || m.body.contains("https://");
willow_client::IndexableMessage {
message_id: m.id,
channel_id: m.channel_id.clone(),
channel_name: current_ch.clone(),
grove_id: if grove_id.is_empty() {
None
} else {
Some(grove_id.clone())
},
letter_id: None,
author_peer_id,
author_handle: m.author_display_name.to_lowercase(),
author_display_name: m.author_display_name,
timestamp_ms: m.timestamp_ms,
body: m.body,
has_image: false,
has_file: false,
has_link,
}
willow_client::IndexableMessage::from_display_message(
&m,
&current_ch,
grove.clone(),
None,
)
})
.collect();
// Ignore local peer binding to avoid unused_variables clippy
Expand Down