Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions quickwit/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions quickwit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ async-trait = "0.1"
backtrace = "0.3"
base64 = "0.22"
binggan = { version = "0.14" }
bitpacking = "0.9.2"
bytes = { version = "1", features = ["serde"] }
bytesize = { version = "1.3", features = ["serde"] }
bytestring = "1.4"
Expand Down
2 changes: 2 additions & 0 deletions quickwit/quickwit-config/src/node_config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ pub struct SearcherConfig {
pub fast_field_cache_capacity: ByteSize,
pub split_footer_cache_capacity: ByteSize,
pub partial_request_cache_capacity: ByteSize,
pub predicate_cache_capacity: ByteSize,
pub max_num_concurrent_split_searches: usize,
// Deprecated: stream search requests are no longer supported.
#[serde(alias = "max_num_concurrent_split_streams", default, skip_serializing)]
Expand Down Expand Up @@ -324,6 +325,7 @@ impl Default for SearcherConfig {
fast_field_cache_capacity: ByteSize::gb(1),
split_footer_cache_capacity: ByteSize::mb(500),
partial_request_cache_capacity: ByteSize::mb(64),
predicate_cache_capacity: ByteSize::mb(256),
max_num_concurrent_split_searches: 100,
_max_num_concurrent_split_streams: None,
aggregation_memory_limit: ByteSize::mb(500),
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-config/src/node_config/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ mod tests {
fast_field_cache_capacity: ByteSize::gb(10),
split_footer_cache_capacity: ByteSize::gb(1),
partial_request_cache_capacity: ByteSize::mb(64),
predicate_cache_capacity: ByteSize::mb(256),
max_num_concurrent_split_searches: 150,
_max_num_concurrent_split_streams: Some(serde::de::IgnoredAny),
split_cache: None,
Expand Down
19 changes: 12 additions & 7 deletions quickwit/quickwit-doc-mapper/src/doc_mapper/doc_mapper_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@

use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::num::NonZeroU32;
use std::sync::Arc;

use anyhow::{Context, bail};
use fnv::FnvHashSet;
use quickwit_proto::types::DocMappingUid;
use quickwit_query::create_default_quickwit_tokenizer_manager;
use quickwit_query::query_ast::QueryAst;
use quickwit_query::query_ast::{BuildTantivyAstContext, QueryAst};
use quickwit_query::tokenizers::TokenizerManager;
use serde::{Deserialize, Serialize};
use serde_json::{self, Value as JsonValue};
Expand Down Expand Up @@ -636,15 +637,19 @@ impl DocMapper {
pub fn query(
&self,
split_schema: Schema,
query_ast: &QueryAst,
query_ast: QueryAst,
with_validation: bool,
cache_context: Option<(Arc<dyn quickwit_query::query_ast::PredicateCache>, String)>,
) -> Result<(Box<dyn Query>, WarmupInfo), QueryParserError> {
build_query(
query_ast,
split_schema,
self.tokenizer_manager(),
&self.default_search_field_names[..],
with_validation,
&BuildTantivyAstContext {
schema: &split_schema,
tokenizer_manager: self.tokenizer_manager(),
search_fields: &self.default_search_field_names[..],
with_validation,
},
cache_context,
)
}

Expand Down Expand Up @@ -2068,7 +2073,7 @@ mod tests {
.parse_user_query(doc_mapper.default_search_fields())
.map_err(|err| err.to_string())?;
let (query, _) = doc_mapper
.query(doc_mapper.schema(), &query_ast, true)
.query(doc_mapper.schema(), query_ast, true, None)
.map_err(|err| err.to_string())?;
Ok(format!("{query:?}"))
}
Expand Down
8 changes: 4 additions & 4 deletions quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ mod tests {
}
.parse_user_query(&[])
.unwrap();
let (query, _) = doc_mapper.query(schema, &query_ast, true).unwrap();
let (query, _) = doc_mapper.query(schema, query_ast, true, None).unwrap();
assert_eq!(
format!("{query:?}"),
r#"TermQuery(Term(field=2, type=Json, path=toto.titi, type=Str, "hello"))"#
Expand All @@ -304,7 +304,7 @@ mod tests {
let query_ast = query_ast_from_user_text("toto.titi:hello", None)
.parse_user_query(doc_mapper.default_search_fields())
.unwrap();
let (query, _) = doc_mapper.query(schema, &query_ast, true).unwrap();
let (query, _) = doc_mapper.query(schema, query_ast, true, None).unwrap();
assert_eq!(
format!("{query:?}"),
r#"TermQuery(Term(field=1, type=Json, path=toto.titi, type=Str, "hello"))"#
Expand All @@ -318,7 +318,7 @@ mod tests {
let query_ast = query_ast_from_user_text("toto:5", None)
.parse_user_query(&[])
.unwrap();
let (query, _) = doc_mapper.query(schema, &query_ast, true).unwrap();
let (query, _) = doc_mapper.query(schema, query_ast, true, None).unwrap();
assert_eq!(
format!("{query:?}"),
r#"BooleanQuery { subqueries: [(Should, TermQuery(Term(field=1, type=Json, path=toto, type=I64, 5))), (Should, TermQuery(Term(field=1, type=Json, path=toto, type=Str, "5")))], minimum_number_should_match: 1 }"#
Expand Down Expand Up @@ -857,7 +857,7 @@ mod tests {
field: "multilang".to_string(),
value: "JPN:す".to_string(),
});
let (query, _) = doc_mapper.query(schema, &query_ast, false).unwrap();
let (query, _) = doc_mapper.query(schema, query_ast, false, None).unwrap();
assert_eq!(
format!("{query:?}"),
r#"TermQuery(Term(field=2, type=Str, "JPN:す"))"#
Expand Down
83 changes: 37 additions & 46 deletions quickwit/quickwit-doc-mapper/src/query_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
use std::collections::{HashMap, HashSet};
use std::convert::Infallible;
use std::ops::Bound;
use std::sync::Arc;

use quickwit_proto::types::SplitId;
use quickwit_query::query_ast::{
FieldPresenceQuery, FullTextQuery, PhrasePrefixQuery, QueryAst, QueryAstVisitor, RangeQuery,
RegexQuery, TermSetQuery, WildcardQuery,
BuildTantivyAstContext, FieldPresenceQuery, FullTextQuery, PhrasePrefixQuery, QueryAst,
QueryAstTransformer, QueryAstVisitor, RangeQuery, RegexQuery, TermSetQuery, WildcardQuery,
};
use quickwit_query::tokenizers::TokenizerManager;
use quickwit_query::{InvalidQuery, find_field_or_hit_dynamic};
Expand Down Expand Up @@ -154,17 +156,24 @@ impl<'a, 'f> QueryAstVisitor<'a> for ExistsQueryFastFields<'f> {

/// Build a `Query` with field resolution & forbidding range clauses.
pub(crate) fn build_query(
query_ast: &QueryAst,
schema: Schema,
tokenizer_manager: &TokenizerManager,
search_fields: &[String],
with_validation: bool,
query_ast: QueryAst,
context: &BuildTantivyAstContext,
cache_context: Option<(Arc<dyn quickwit_query::query_ast::PredicateCache>, SplitId)>,
) -> Result<(Box<dyn Query>, WarmupInfo), QueryParserError> {
let mut fast_fields: HashSet<FastFieldWarmupInfo> = HashSet::new();

let query_ast = if let Some((cache, split_id)) = cache_context {
let Ok(query_ast) = quickwit_query::query_ast::PredicateCacheInjector { cache, split_id }
.transform(query_ast);
// this transformer isn't supposed to ever remove a node
query_ast.unwrap_or(QueryAst::MatchAll)
} else {
query_ast
};

let mut range_query_fields = RangeQueryFields::default();
// This cannot fail. The error type is Infallible.
let Ok(_) = range_query_fields.visit(query_ast);
let Ok(_) = range_query_fields.visit(&query_ast);
let range_query_fast_fields =
range_query_fields
.range_query_field_names
Expand All @@ -177,31 +186,30 @@ pub(crate) fn build_query(

let Ok(_) = TermSearchOnColumnar {
fields: &mut fast_fields,
schema: schema.clone(),
schema: context.schema.clone(),
}
.visit(query_ast);
.visit(&query_ast);

let Ok(_) = ExistsQueryFastFields {
fields: &mut fast_fields,
schema: schema.clone(),
schema: context.schema.clone(),
}
.visit(query_ast);
.visit(&query_ast);

let query = query_ast.build_tantivy_query(
&schema,
tokenizer_manager,
search_fields,
with_validation,
)?;
let query = query_ast.build_tantivy_query(context)?;

let term_set_query_fields = extract_term_set_query_fields(query_ast, &schema)?;
let term_set_query_fields = extract_term_set_query_fields(&query_ast, context.schema)?;
let (term_ranges_grouped_by_field, automatons_grouped_by_field) =
extract_prefix_term_ranges_and_automaton(query_ast, &schema, tokenizer_manager)?;
extract_prefix_term_ranges_and_automaton(
&query_ast,
context.schema,
context.tokenizer_manager,
)?;

let mut terms_grouped_by_field: HashMap<Field, HashMap<_, bool>> = Default::default();
query.query_terms(&mut |term, need_position| {
let field = term.field();
if !schema.get_field_entry(field).is_indexed() {
if !context.schema.get_field_entry(field).is_indexed() {
return;
}
*terms_grouped_by_field
Expand Down Expand Up @@ -419,8 +427,8 @@ mod test {

use quickwit_common::shared_consts::FIELD_PRESENCE_FIELD_NAME;
use quickwit_query::query_ast::{
FullTextMode, FullTextParams, PhrasePrefixQuery, QueryAstVisitor, UserInputQuery,
query_ast_from_user_text,
BuildTantivyAstContext, FullTextMode, FullTextParams, PhrasePrefixQuery, QueryAstVisitor,
UserInputQuery, query_ast_from_user_text,
};
use quickwit_query::{
BooleanOperand, MatchAllOrNone, create_default_quickwit_tokenizer_manager,
Expand Down Expand Up @@ -506,13 +514,7 @@ mod test {
.parse_user_query(&[])
.map_err(|err| err.to_string())?;
let schema = make_schema(dynamic_mode);
let query_result = build_query(
&query_ast,
schema,
&create_default_quickwit_tokenizer_manager(),
&[],
true,
);
let query_result = build_query(query_ast, &BuildTantivyAstContext::for_test(&schema), None);
query_result
.map(|query| format!("{query:?}"))
.map_err(|err| err.to_string())
Expand Down Expand Up @@ -886,29 +888,18 @@ mod test {
.parse_user_query(&[])
.unwrap();

let (_, warmup_info) = build_query(
&query_with_set,
make_schema(true),
&create_default_quickwit_tokenizer_manager(),
&[],
true,
)
.unwrap();
let schema = make_schema(true);
let context = BuildTantivyAstContext::for_test(&schema);

let (_, warmup_info) = build_query(query_with_set, &context, None).unwrap();
assert_eq!(warmup_info.term_dict_fields.len(), 1);
assert!(
warmup_info
.term_dict_fields
.contains(&tantivy::schema::Field::from_field_id(2))
);

let (_, warmup_info) = build_query(
&query_without_set,
make_schema(true),
&create_default_quickwit_tokenizer_manager(),
&[],
true,
)
.unwrap();
let (_, warmup_info) = build_query(query_without_set, &context, None).unwrap();
assert!(warmup_info.term_dict_fields.is_empty());
}

Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-doc-mapper/src/tag_pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ fn extract_unsimplified_tags_filter_ast(query_ast: QueryAst) -> UnsimplifiedTagF
}
QueryAst::FieldPresence(_) => UnsimplifiedTagFilterAst::Uninformative,
QueryAst::Regex(_) => UnsimplifiedTagFilterAst::Uninformative,
QueryAst::Cache(cache_node) => extract_unsimplified_tags_filter_ast(*cache_node.inner),
}
}

Expand Down
2 changes: 1 addition & 1 deletion quickwit/quickwit-indexing/src/actors/merge_executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ impl MergeExecutor {
parsed_query_ast
);
let (query, _) =
doc_mapper.query(union_index.schema(), &parsed_query_ast, false)?;
doc_mapper.query(union_index.schema(), parsed_query_ast, false, None)?;
index_writer.delete_query(query)?;
}
debug!("commit-delete-operations");
Expand Down
3 changes: 3 additions & 0 deletions quickwit/quickwit-query/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ license.workspace = true
[dependencies]
anyhow = { workspace = true }
base64 = { workspace = true }
bitpacking = { workspace = true }
hex = { workspace = true }
lindera-core = { workspace = true, optional = true }
lindera-dictionary = { workspace = true, optional = true }
Expand All @@ -24,13 +25,15 @@ serde_json = { workspace = true }
serde_with = { workspace = true }
tantivy = { workspace = true }
tantivy-fst = { workspace = true }
tracing = { workspace = true }
time = { workspace = true }
thiserror = { workspace = true }
rustc-hash = { workspace = true }
whichlang = { workspace = true, optional = true }

quickwit-common = { workspace = true }
quickwit-datetime = { workspace = true }
quickwit-proto = { workspace = true }

[dev-dependencies]
criterion = { workspace = true }
Expand Down
Loading