From fa369292bde2314dccd1eb1b0cf297c0871e9062 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Tue, 6 Jan 2026 18:26:45 +0800 Subject: [PATCH 1/2] Add boolean match plan scaffold --- rust/lance-index/src/scalar/inverted/query.rs | 152 ++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/rust/lance-index/src/scalar/inverted/query.rs b/rust/lance-index/src/scalar/inverted/query.rs index ad05aab29b5..44d762fcc85 100644 --- a/rust/lance-index/src/scalar/inverted/query.rs +++ b/rust/lance-index/src/scalar/inverted/query.rs @@ -635,6 +635,87 @@ impl BooleanQuery { } } +#[derive(Debug, Clone, PartialEq)] +#[allow(dead_code)] +pub(crate) struct BooleanMatchPlan { + pub column: String, + pub should: Vec, + pub must: Vec, + pub must_not: Vec, +} + +#[allow(dead_code)] +impl BooleanMatchPlan { + pub(crate) fn try_build(query: &FtsQuery) -> Option { + match query { + FtsQuery::Match(match_query) => { + let mut column = None; + let mut should = Vec::new(); + Self::push_match(&mut should, &mut column, match_query)?; + Some(Self { + column: column?, + should, + must: Vec::new(), + must_not: Vec::new(), + }) + } + FtsQuery::Boolean(bool_query) => { + let mut column = None; + let mut should = Vec::with_capacity(bool_query.should.len()); + let mut must = Vec::with_capacity(bool_query.must.len()); + let mut must_not = Vec::with_capacity(bool_query.must_not.len()); + + for query in &bool_query.should { + let FtsQuery::Match(match_query) = query else { + return None; + }; + Self::push_match(&mut should, &mut column, match_query)?; + } + for query in &bool_query.must { + let FtsQuery::Match(match_query) = query else { + return None; + }; + Self::push_match(&mut must, &mut column, match_query)?; + } + for query in &bool_query.must_not { + let FtsQuery::Match(match_query) = query else { + return None; + }; + Self::push_match(&mut must_not, &mut column, match_query)?; + } + + if should.is_empty() && must.is_empty() { + return None; + } + Some(Self { + column: column?, + should, + must, + must_not, + }) + } + _ => None, + } + } + + fn push_match( + dest: &mut Vec, + column: &mut Option, + query: &MatchQuery, + ) -> Option<()> { + let query_column = query.column.as_ref()?; + if let Some(existing) = column.as_ref() { + if existing != query_column { + return None; + } + } else { + *column = Some(query_column.clone()); + } + dest.push(query.clone()); + Some(()) + } +} + impl FtsQueryNode for BooleanQuery { fn columns(&self) -> HashSet { let mut columns = HashSet::new(); @@ -909,4 +990,75 @@ mod tests { let query: PhraseQuery = serde_json::from_value(query).unwrap(); assert_eq!(query, expected); } + + #[test] + fn test_boolean_match_plan_match_query() { + use super::*; + + let query = MatchQuery::new("hello".to_string()).with_column(Some("text".to_string())); + let plan = BooleanMatchPlan::try_build(&FtsQuery::Match(query.clone())).unwrap(); + assert_eq!(plan.column, "text"); + assert_eq!(plan.should, vec![query]); + assert!(plan.must.is_empty()); + assert!(plan.must_not.is_empty()); + } + + #[test] + fn test_boolean_match_plan_boolean_query() { + use super::*; + + let should = MatchQuery::new("a".to_string()).with_column(Some("text".to_string())); + let must = MatchQuery::new("b".to_string()).with_column(Some("text".to_string())); + let must_not = MatchQuery::new("c".to_string()).with_column(Some("text".to_string())); + let query = BooleanQuery::new(vec![ + (Occur::Should, should.clone().into()), + (Occur::Must, must.clone().into()), + (Occur::MustNot, must_not.clone().into()), + ]); + let plan = BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).unwrap(); + assert_eq!(plan.column, "text"); + assert_eq!(plan.should, vec![should]); + assert_eq!(plan.must, vec![must]); + assert_eq!(plan.must_not, vec![must_not]); + } + + #[test] + fn test_boolean_match_plan_rejects_mixed_columns() { + use super::*; + + let should = MatchQuery::new("a".to_string()).with_column(Some("text".to_string())); + let must = MatchQuery::new("b".to_string()).with_column(Some("title".to_string())); + let query = BooleanQuery::new(vec![ + (Occur::Should, should.into()), + (Occur::Must, must.into()), + ]); + assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none()); + } + + #[test] + fn test_boolean_match_plan_rejects_non_match_queries() { + use super::*; + + let phrase = + PhraseQuery::new("hello world".to_string()).with_column(Some("text".to_string())); + let query = BooleanQuery::new(vec![(Occur::Should, phrase.into())]); + assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none()); + } + + #[test] + fn test_boolean_match_plan_rejects_only_must_not() { + use super::*; + + let must_not = MatchQuery::new("c".to_string()).with_column(Some("text".to_string())); + let query = BooleanQuery::new(vec![(Occur::MustNot, must_not.into())]); + assert!(BooleanMatchPlan::try_build(&FtsQuery::Boolean(query)).is_none()); + } + + #[test] + fn test_boolean_match_plan_rejects_missing_column() { + use super::*; + + let query = MatchQuery::new("hello".to_string()); + assert!(BooleanMatchPlan::try_build(&FtsQuery::Match(query)).is_none()); + } } From 8fda02aeb6e9b73eb3cca4b80bf97819f1341bdd Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Tue, 6 Jan 2026 18:35:44 +0800 Subject: [PATCH 2/2] Refactor boolean plan building --- rust/lance-index/src/scalar/inverted/query.rs | 39 ++++++++----------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/rust/lance-index/src/scalar/inverted/query.rs b/rust/lance-index/src/scalar/inverted/query.rs index 44d762fcc85..5ad00aade30 100644 --- a/rust/lance-index/src/scalar/inverted/query.rs +++ b/rust/lance-index/src/scalar/inverted/query.rs @@ -661,28 +661,9 @@ impl BooleanMatchPlan { } FtsQuery::Boolean(bool_query) => { let mut column = None; - let mut should = Vec::with_capacity(bool_query.should.len()); - let mut must = Vec::with_capacity(bool_query.must.len()); - let mut must_not = Vec::with_capacity(bool_query.must_not.len()); - - for query in &bool_query.should { - let FtsQuery::Match(match_query) = query else { - return None; - }; - Self::push_match(&mut should, &mut column, match_query)?; - } - for query in &bool_query.must { - let FtsQuery::Match(match_query) = query else { - return None; - }; - Self::push_match(&mut must, &mut column, match_query)?; - } - for query in &bool_query.must_not { - let FtsQuery::Match(match_query) = query else { - return None; - }; - Self::push_match(&mut must_not, &mut column, match_query)?; - } + let should = Self::collect_matches(&bool_query.should, &mut column)?; + let must = Self::collect_matches(&bool_query.must, &mut column)?; + let must_not = Self::collect_matches(&bool_query.must_not, &mut column)?; if should.is_empty() && must.is_empty() { return None; @@ -714,6 +695,20 @@ impl BooleanMatchPlan { dest.push(query.clone()); Some(()) } + + fn collect_matches( + queries: &[FtsQuery], + column: &mut Option, + ) -> Option> { + let mut matches = Vec::with_capacity(queries.len()); + for query in queries { + let FtsQuery::Match(match_query) = query else { + return None; + }; + Self::push_match(&mut matches, column, match_query)?; + } + Some(matches) + } } impl FtsQueryNode for BooleanQuery {