Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ pub struct TermQueryParams {
case_insensitive: bool,
}

#[cfg(test)]
pub fn term_query_from_field_value(field: impl ToString, value: impl ToString) -> TermQuery {
TermQuery {
field: field.to_string(),
Expand Down
23 changes: 11 additions & 12 deletions quickwit/quickwit-query/src/elastic_query_dsl/terms_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::{BTreeSet, HashMap};

use serde::Deserialize;

use crate::elastic_query_dsl::bool_query::BoolQuery;
use crate::elastic_query_dsl::one_field_map::OneFieldMap;
use crate::elastic_query_dsl::term_query::term_query_from_field_value;
use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner};
use crate::not_nan_f32::NotNaNf32;
use crate::query_ast::QueryAst;
use crate::query_ast::{QueryAst, TermSetQuery};

#[derive(PartialEq, Eq, Debug, Deserialize, Clone)]
#[serde(try_from = "TermsQueryForSerialization")]
Expand Down Expand Up @@ -87,15 +87,14 @@ impl TryFrom<TermsQueryForSerialization> for TermsQuery {

impl ConvertibleToQueryAst for TermsQuery {
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
let term_queries: Vec<ElasticQueryDslInner> = self
.values
.into_iter()
.map(|value| term_query_from_field_value(self.field.clone(), value))
.map(ElasticQueryDslInner::from)
.collect();
let mut union = BoolQuery::union(term_queries);
union.boost = self.boost;
union.convert_to_query_ast()
let mut terms_per_field = HashMap::new();
let values_set: BTreeSet<String> = self.values.into_iter().collect();
terms_per_field.insert(self.field, values_set);

let term_set_query = TermSetQuery { terms_per_field };
let query_ast: QueryAst = term_set_query.into();

Ok(query_ast.boost(self.boost))
}
}

Expand Down
121 changes: 117 additions & 4 deletions quickwit/quickwit-query/src/query_ast/term_set_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use tantivy::Term;

use crate::InvalidQuery;
use crate::query_ast::{
BuildTantivyAst, BuildTantivyAstContext, QueryAst, TantivyQueryAst, TermQuery,
BoolQuery, BuildTantivyAst, BuildTantivyAstContext, QueryAst, TantivyQueryAst, TermQuery,
};

/// TermSetQuery matches the same document set as if it was a union of
Expand All @@ -32,6 +32,53 @@ pub struct TermSetQuery {
}

impl TermSetQuery {
fn has_fast_only_field(&self, context: &BuildTantivyAstContext) -> bool {
for full_path in self.terms_per_field.keys() {
if let Some((_, field_entry, _)) =
super::utils::find_field_or_hit_dynamic(full_path, context.schema)
&& field_entry.is_fast()
&& !field_entry.is_indexed()
{
return true;
}
}
false
}

fn build_bool_query(
&self,
context: &BuildTantivyAstContext,
) -> Result<TantivyQueryAst, InvalidQuery> {
let should_clauses = self
.terms_per_field
.iter()
.flat_map(|(full_path, values)| {
values.iter().map(|value| {
QueryAst::Term(TermQuery {
field: full_path.to_string(),
value: value.to_string(),
})
})
})
.collect();

let bool_query = BoolQuery {
should: should_clauses,
..Default::default()
};

bool_query.build_tantivy_ast_impl(context)
}

fn build_term_set_query(
&self,
context: &BuildTantivyAstContext,
) -> Result<TantivyQueryAst, InvalidQuery> {
let terms_it = self.make_term_iterator(context)?;
let term_set_query = tantivy::query::TermSetQuery::new(terms_it);
Ok(term_set_query.into())
}

fn make_term_iterator(
&self,
context: &BuildTantivyAstContext,
Expand Down Expand Up @@ -67,9 +114,11 @@ impl BuildTantivyAst for TermSetQuery {
&self,
context: &BuildTantivyAstContext,
) -> Result<TantivyQueryAst, InvalidQuery> {
let terms_it = self.make_term_iterator(context)?;
let term_set_query = tantivy::query::TermSetQuery::new(terms_it);
Ok(term_set_query.into())
if self.has_fast_only_field(context) {
self.build_bool_query(context)
} else {
self.build_term_set_query(context)
}
}
}

Expand All @@ -78,3 +127,67 @@ impl From<TermSetQuery> for QueryAst {
QueryAst::TermSet(term_set_query)
}
}

#[cfg(test)]
mod tests {
use std::collections::{BTreeSet, HashMap};

use tantivy::schema::{FAST, INDEXED, Schema};

use super::TermSetQuery;
use crate::query_ast::{BuildTantivyAst, BuildTantivyAstContext};

#[test]
fn test_term_set_query_with_fast_only_field_returns_bool_query() {
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("fast_field", FAST);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we keep things that way, i think a test with mixed "one fast field and one indexed field" would be nice to have
given this is a workaround and we may update tantivy so it's no longer needed, it's not really needed though

let schema = schema_builder.build();

let terms_per_field = HashMap::from([(
"fast_field".to_string(),
BTreeSet::from(["1".to_string(), "2".to_string()]),
)]);
let term_set_query = TermSetQuery { terms_per_field };

let tantivy_query_ast = term_set_query
.build_tantivy_ast_call(&BuildTantivyAstContext::for_test(&schema))
.unwrap();

let bool_query = tantivy_query_ast
.as_bool_query()
.expect("Expected BoolQuery for fast-only field, but got a different query type");
assert_eq!(bool_query.should.len(), 2);
assert_eq!(bool_query.must.len(), 0);
assert_eq!(bool_query.must_not.len(), 0);
assert_eq!(bool_query.filter.len(), 0);
}

#[test]
fn test_term_set_query_with_indexed_field_uses_term_set() {
let mut schema_builder = Schema::builder();
schema_builder.add_u64_field("indexed_field", FAST | INDEXED);
let schema = schema_builder.build();

let terms_per_field = HashMap::from([(
"indexed_field".to_string(),
BTreeSet::from(["1".to_string(), "2".to_string()]),
)]);
let term_set_query = TermSetQuery { terms_per_field };

let tantivy_query_ast = term_set_query
.build_tantivy_ast_call(&BuildTantivyAstContext::for_test(&schema))
.unwrap();

// Should return a leaf query (TermSetQuery wrapped in TantivyQueryAst)
let leaf = tantivy_query_ast
.as_leaf()
.expect("Expected a leaf query (TermSetQuery), but got a complex query");

// Verify it's a TermSetQuery by checking the debug representation
let debug_str = format!("{leaf:?}");
assert!(
debug_str.contains("TermSetQuery"),
"Expected TermSetQuery, got: {debug_str}"
);
}
}