Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 42 additions & 10 deletions docs/reference/es_compatible_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -695,10 +695,11 @@ When working on text, it is recommended to only use `term` queries on fields con

#### Supported Parameters

| Variable | Type | Description | Default |
| -------- | -------- | ---------------------------------------------------------------------------- | ------- |
| `value` | String | Term value. This is the string representation of a token after tokenization. | - |
| `boost` | `Number` | Multiplier boost for score computation | 1.0 |
| Variable | Type | Description | Default |
| ------------------ | ------- | ---------------------------------------------------------------------------- | ------- |
| `value` | String | Term value. This is the string representation of a token after tokenization. | - |
| `boost` | Number | Multiplier boost for score computation | 1.0 |
| `case_insensitive` | Boolean | Allows ASCII case insensitive matching of the value. | false |



Expand Down Expand Up @@ -763,9 +764,10 @@ Returns documents that contain a specific prefix in a provided field.

#### Supported Parameters

| Variable | Type | Description | Default |
| -------- | ------ | ----------------------------------------------- | ------- |
| `value` | String | Beginning characters of terms you wish to find. | - |
| Variable | Type | Description | Default |
| ------------------ | ------- | ---------------------------------------------------- | ------- |
| `value` | String | Beginning characters of terms you wish to find. | - |
| `case_insensitive` | Boolean | Allows ASCII case insensitive matching of the value. | false |

### `wildcard`

Expand All @@ -791,9 +793,39 @@ Returns documents that contain terms matching a wildcard pattern:

#### Supported Parameters

| Variable | Type | Description | Default |
| -------- | ------ | -------------------------------------------- | ------- |
| `value` | String | Wildcard pattern for terms you wish to find. | - |
| Variable | Type | Description | Default |
| ------------------ | ------- | ---------------------------------------------------- | ------- |
| `value` | String | Wildcard pattern for terms you wish to find. | - |
| `boost` | Number | Multiplier boost for score computation. | 1.0 |
| `case_insensitive` | Boolean | Allows ASCII case insensitive matching of the value. | false |


### `regexp`

[Elasticsearch reference documentation](https://www.elastic.co/guide/en/elasticsearch/reference/8.8/query-dsl-regexp-query.html)

Returns documents that contain terms matching a regular expression.

#### Example

```json
{
"query": {
"regexp": {
"author.login" {
"value": "adm.*n",
}
}
}
}
```

#### Supported Parameters

| Variable | Type | Description | Default |
| ------------------ | ------- | ---------------------------------------------------- | ------- |
| `value` | String | Wildcard pattern for terms you wish to find. | - |
| `case_insensitive` | Boolean | Allows ASCII case insensitive matching of the value. | false |


### About the `lenient` argument
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ pub(crate) struct PrefixQuery {
#[serde(deny_unknown_fields)]
pub struct PrefixQueryParams {
value: String,
#[serde(default)]
case_insensitive: bool,
}

impl ConvertibleToQueryAst for PrefixQuery {
Expand All @@ -45,6 +47,7 @@ impl ConvertibleToQueryAst for PrefixQuery {
field: self.field,
value: wildcard,
lenient: true,
case_insensitive: self.params.case_insensitive,
}
.into())
}
Expand All @@ -64,7 +67,10 @@ impl From<OneFieldMap<StringOrStructForSerialization<PrefixQueryParams>>> for Pr

impl From<String> for PrefixQueryParams {
fn from(value: String) -> PrefixQueryParams {
PrefixQueryParams { value }
PrefixQueryParams {
value,
case_insensitive: false,
}
}
}

Expand Down
10 changes: 8 additions & 2 deletions quickwit/quickwit-query/src/elastic_query_dsl/regex_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,22 @@ use crate::query_ast::{QueryAst, RegexQuery as AstRegexQuery};
#[serde(deny_unknown_fields)]
pub struct RegexQueryParams {
value: String,
// we could probably add case_insensitive
#[serde(default)]
case_insensitive: bool,
}

pub type RegexQuery = OneFieldMap<RegexQueryParams>;

impl ConvertibleToQueryAst for RegexQuery {
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
let regex = if self.value.case_insensitive {
format!("(?i){}", self.value.value)
} else {
self.value.value.clone()
};
Ok(AstRegexQuery {
field: self.field,
regex: self.value.value,
regex,
}
.into())
}
Expand Down
19 changes: 18 additions & 1 deletion quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ impl From<String> for TermQueryParams {
TermQueryParams {
value: query,
boost: None,
case_insensitive: false,
}
}
}
Expand Down Expand Up @@ -70,6 +71,8 @@ pub struct TermQueryParams {
pub value: String,
#[serde(default)]
pub boost: Option<NotNaNf32>,
#[serde(default)]
case_insensitive: bool,
}

pub fn term_query_from_field_value(field: impl ToString, value: impl ToString) -> TermQuery {
Expand All @@ -78,6 +81,7 @@ pub fn term_query_from_field_value(field: impl ToString, value: impl ToString) -
value: TermQueryParams {
value: value.to_string(),
boost: None,
case_insensitive: false,
},
}
}
Expand All @@ -90,7 +94,20 @@ impl From<TermQuery> for ElasticQueryDslInner {

impl ConvertibleToQueryAst for TermQuery {
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
let TermQueryParams { value, boost } = self.value;
let TermQueryParams {
value,
boost,
case_insensitive,
} = self.value;
if case_insensitive {
let ci_value = format!("(?i){}", regex::escape(&value));
let term_ast: QueryAst = query_ast::RegexQuery {
field: self.field,
regex: ci_value,
}
.into();
return Ok(term_ast.boost(boost));
}
let term_ast: QueryAst = query_ast::TermQuery {
field: self.field,
value,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ pub struct WildcardQueryParams {
value: String,
#[serde(default)]
pub boost: Option<NotNaNf32>,
#[serde(default)]
case_insensitive: bool,
}

impl ConvertibleToQueryAst for WildcardQuery {
Expand All @@ -40,6 +42,7 @@ impl ConvertibleToQueryAst for WildcardQuery {
field: self.field,
value: self.params.value,
lenient: true,
case_insensitive: self.params.case_insensitive,
}
.into();
Ok(wildcard_ast.boost(self.params.boost))
Expand All @@ -60,7 +63,11 @@ impl From<OneFieldMap<StringOrStructForSerialization<WildcardQueryParams>>> for

impl From<String> for WildcardQueryParams {
fn from(value: String) -> WildcardQueryParams {
WildcardQueryParams { value, boost: None }
WildcardQueryParams {
value,
boost: None,
case_insensitive: false,
}
}
}

Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-query/src/query_ast/user_input_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ fn convert_user_input_literal(
field: field_name,
value: phrase.clone(),
lenient,
case_insensitive: false,
}
.into()
} else {
Expand Down
58 changes: 58 additions & 0 deletions quickwit/quickwit-query/src/query_ast/wildcard_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub struct WildcardQuery {
pub value: String,
/// Support missing fields
pub lenient: bool,
pub case_insensitive: bool,
}

impl From<WildcardQuery> for QueryAst {
Expand Down Expand Up @@ -133,6 +134,11 @@ impl WildcardQuery {
let tokenizer_name = text_field_indexing.tokenizer();
let regex =
sub_query_parts_to_regex(sub_query_parts, tokenizer_name, tokenizer_manager)?;
let regex = if self.case_insensitive {
format!("(?i){}", regex)
} else {
regex
};

Ok((field, None, regex))
}
Expand All @@ -147,6 +153,11 @@ impl WildcardQuery {
let tokenizer_name = text_field_indexing.tokenizer();
let regex =
sub_query_parts_to_regex(sub_query_parts, tokenizer_name, tokenizer_manager)?;
let regex = if self.case_insensitive {
format!("(?i){}", regex)
} else {
regex
};

let mut term_for_path = Term::from_field_json_path(
field,
Expand Down Expand Up @@ -219,6 +230,7 @@ mod tests {
field: "text_field".to_string(),
value: "MyString Wh1ch?a.nOrMal Tokenizer would*cut".to_string(),
lenient: false,
case_insensitive: false,
};

let tokenizer_manager = create_default_quickwit_tokenizer_manager();
Expand Down Expand Up @@ -261,6 +273,7 @@ mod tests {
field: "text_field".to_string(),
value: "MyString Wh1ch\\?a.nOrMal Tokenizer would\\*cut".to_string(),
lenient: false,
case_insensitive: false,
};

let tokenizer_manager = create_default_quickwit_tokenizer_manager();
Expand Down Expand Up @@ -305,6 +318,7 @@ mod tests {
field: "json_field.Inner.Fie*ld".to_string(),
value: "MyString Wh1ch?a.nOrMal Tokenizer would*cut".to_string(),
lenient: false,
case_insensitive: false,
};

let tokenizer_manager = create_default_quickwit_tokenizer_manager();
Expand Down Expand Up @@ -347,6 +361,7 @@ mod tests {
field: "my_missing_field".to_string(),
value: "My query value*".to_string(),
lenient: false,
case_insensitive: false,
};
let tokenizer_manager = create_default_quickwit_tokenizer_manager();
let schema = single_text_field_schema("my_field", "whitespace");
Expand All @@ -359,4 +374,47 @@ mod tests {
};
assert_eq!(missing_field_full_path, "my_missing_field");
}

#[test]
fn test_wildcard_query_to_regex_on_text_case_insensitive() {
let query = WildcardQuery {
field: "text_field".to_string(),
value: "MyString Wh1ch?a.nOrMal Tokenizer would*cut".to_string(),
lenient: false,
case_insensitive: true,
};

let tokenizer_manager = create_default_quickwit_tokenizer_manager();
for tokenizer in ["raw", "whitespace"] {
let mut schema_builder = TantivySchema::builder();
let text_options = TextOptions::default()
.set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer));
schema_builder.add_text_field("text_field", text_options);
let schema = schema_builder.build();

let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap();
assert_eq!(regex, "(?i)MyString Wh1ch.a\\.nOrMal Tokenizer would.*cut");
assert!(path.is_none());
}

for tokenizer in [
"raw_lowercase",
"lowercase",
"default",
"en_stem",
"chinese_compatible",
"source_code_default",
"source_code_with_hex",
] {
let mut schema_builder = TantivySchema::builder();
let text_options = TextOptions::default()
.set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer));
schema_builder.add_text_field("text_field", text_options);
let schema = schema_builder.build();

let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap();
assert_eq!(regex, "(?i)mystring wh1ch.a\\.normal tokenizer would.*cut");
assert!(path.is_none());
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# case_insensitive not supported.
engines: ["elasticsearch"]
params:
# this overrides the query sent in body apparently
size: 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,25 @@ expected:
hits:
total:
value: 2
---
json:
query:
wildcard:
repo.name:
value: RUS*
case_insensitive: true
expected:
hits:
total:
value: 1
---
json:
query:
wildcard:
repo.name:
value: RUS*
case_insensitive: false
expected:
hits:
total:
value: 0
22 changes: 22 additions & 0 deletions quickwit/rest-api-tests/scenarii/es_compatibility/0030-prefix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,25 @@ expected:
hits:
total:
value: 2
---
json:
query:
prefix:
repo.name:
value: RUST
case_insensitive: true
expected:
hits:
total:
value: 1
---
json:
query:
prefix:
repo.name:
value: RUST
case_insensitive: false
expected:
hits:
total:
value: 0
Loading