Skip to content

Commit bfed5c6

Browse files
authored
feat: Prefix and Wildcard in Elastic DSL (#6000)
* feat: Prefix and Wildcard in Elastic DSL Signed-off-by: Darkheir <[email protected]> * chore: Add licence header Signed-off-by: Darkheir <[email protected]> * feat: Add integration tests Signed-off-by: Darkheir <[email protected]> * feat: Add support for short request format Signed-off-by: Darkheir <[email protected]> * Apply PR review sugestion Signed-off-by: Darkheir <[email protected]> --------- Signed-off-by: Darkheir <[email protected]>
1 parent 660388a commit bfed5c6

File tree

7 files changed

+386
-0
lines changed

7 files changed

+386
-0
lines changed

docs/reference/es_compatible_api.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,60 @@ Query matching only documents containing a non-null value for a given field.
741741
| -------- | ------ | ------------------------------------------------------- | ------- |
742742
| `field` | String | Only documents with a value for field will be returned. | - |
743743

744+
### `prefix`
745+
746+
[Elasticsearch reference documentation](https://www.elastic.co/guide/en/elasticsearch/reference/8.8/query-dsl-prefix-query.html)
747+
748+
Returns documents that contain a specific prefix in a provided field.
749+
750+
#### Example
751+
752+
```json
753+
{
754+
"query": {
755+
"prefix": {
756+
"author.login" {
757+
"value": "adm",
758+
}
759+
}
760+
}
761+
}
762+
```
763+
764+
#### Supported Parameters
765+
766+
| Variable | Type | Description | Default |
767+
| -------- | ------ | ----------------------------------------------- | ------- |
768+
| `value` | String | Beginning characters of terms you wish to find. | - |
769+
770+
### `wildcard`
771+
772+
[Elasticsearch reference documentation](https://www.elastic.co/guide/en/elasticsearch/reference/8.8/query-dsl-wildcard-query.html)
773+
774+
Returns documents that contain terms matching a wildcard pattern:
775+
* `?` replaces one and only one term character
776+
* `*` replaces any number of term characters or an empty string
777+
778+
#### Example
779+
780+
```json
781+
{
782+
"query": {
783+
"wildcard": {
784+
"author.login" {
785+
"value": "adm?n*",
786+
}
787+
}
788+
}
789+
}
790+
```
791+
792+
#### Supported Parameters
793+
794+
| Variable | Type | Description | Default |
795+
| -------- | ------ | -------------------------------------------- | ------- |
796+
| `value` | String | Wildcard pattern for terms you wish to find. | - |
797+
744798

745799
### About the `lenient` argument
746800

quickwit/quickwit-query/src/elastic_query_dsl/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,19 @@ mod match_query;
2222
mod multi_match;
2323
mod one_field_map;
2424
mod phrase_prefix_query;
25+
mod prefix_query;
2526
mod query_string_query;
2627
mod range_query;
2728
mod regex_query;
2829
mod string_or_struct;
2930
mod term_query;
3031
mod terms_query;
32+
mod wildcard_query;
3133

3234
use bool_query::BoolQuery;
3335
pub use one_field_map::OneFieldMap;
3436
use phrase_prefix_query::MatchPhrasePrefixQuery;
37+
use prefix_query::PrefixQuery;
3538
pub(crate) use query_string_query::QueryStringQuery;
3639
use range_query::RangeQuery;
3740
pub(crate) use string_or_struct::StringOrStructForSerialization;
@@ -44,6 +47,7 @@ use crate::elastic_query_dsl::match_query::MatchQuery;
4447
use crate::elastic_query_dsl::multi_match::MultiMatchQuery;
4548
use crate::elastic_query_dsl::regex_query::RegexQuery;
4649
use crate::elastic_query_dsl::terms_query::TermsQuery;
50+
use crate::elastic_query_dsl::wildcard_query::WildcardQuery;
4751
use crate::not_nan_f32::NotNaNf32;
4852
use crate::query_ast::QueryAst;
4953

@@ -85,6 +89,8 @@ pub(crate) enum ElasticQueryDslInner {
8589
Range(RangeQuery),
8690
Exists(ExistsQuery),
8791
Regexp(RegexQuery),
92+
Wildcard(WildcardQuery),
93+
Prefix(PrefixQuery),
8894
}
8995

9096
#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
@@ -133,6 +139,8 @@ impl ConvertibleToQueryAst for ElasticQueryDslInner {
133139
Self::Exists(exists_query) => exists_query.convert_to_query_ast(),
134140
Self::MultiMatch(multi_match_query) => multi_match_query.convert_to_query_ast(),
135141
Self::Regexp(regex_query) => regex_query.convert_to_query_ast(),
142+
Self::Wildcard(wildcard_query) => wildcard_query.convert_to_query_ast(),
143+
Self::Prefix(prefix_query) => prefix_query.convert_to_query_ast(),
136144
}
137145
}
138146
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright 2021-Present Datadog, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use serde::Deserialize;
16+
17+
use crate::elastic_query_dsl::one_field_map::OneFieldMap;
18+
use crate::elastic_query_dsl::{ConvertibleToQueryAst, StringOrStructForSerialization};
19+
use crate::query_ast::{QueryAst, WildcardQuery as AstWildcardQuery};
20+
21+
#[derive(Deserialize, Clone, Eq, PartialEq, Debug)]
22+
#[serde(from = "OneFieldMap<StringOrStructForSerialization<PrefixQueryParams>>")]
23+
pub(crate) struct PrefixQuery {
24+
pub(crate) field: String,
25+
pub(crate) params: PrefixQueryParams,
26+
}
27+
28+
#[derive(Deserialize, Debug, Default, Eq, PartialEq, Clone)]
29+
#[serde(deny_unknown_fields)]
30+
pub struct PrefixQueryParams {
31+
value: String,
32+
}
33+
34+
impl ConvertibleToQueryAst for PrefixQuery {
35+
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
36+
let wildcard = format!(
37+
"{}*",
38+
self.params
39+
.value
40+
.replace(r"\", r"\\")
41+
.replace("*", r"\*")
42+
.replace("?", r"\?")
43+
);
44+
Ok(AstWildcardQuery {
45+
field: self.field,
46+
value: wildcard,
47+
lenient: true,
48+
}
49+
.into())
50+
}
51+
}
52+
53+
impl From<OneFieldMap<StringOrStructForSerialization<PrefixQueryParams>>> for PrefixQuery {
54+
fn from(
55+
match_query_params: OneFieldMap<StringOrStructForSerialization<PrefixQueryParams>>,
56+
) -> Self {
57+
let OneFieldMap { field, value } = match_query_params;
58+
PrefixQuery {
59+
field,
60+
params: value.inner,
61+
}
62+
}
63+
}
64+
65+
impl From<String> for PrefixQueryParams {
66+
fn from(value: String) -> PrefixQueryParams {
67+
PrefixQueryParams { value }
68+
}
69+
}
70+
71+
#[cfg(test)]
72+
mod tests {
73+
use super::*;
74+
75+
#[test]
76+
fn test_prefix_query_convert_to_query_ast() {
77+
let prefix_query_json = r#"{
78+
"user_name": {
79+
"value": "john"
80+
}
81+
}"#;
82+
let prefix_query: PrefixQuery = serde_json::from_str(prefix_query_json).unwrap();
83+
let query_ast = prefix_query.convert_to_query_ast().unwrap();
84+
85+
if let QueryAst::Wildcard(prefix) = query_ast {
86+
assert_eq!(prefix.field, "user_name");
87+
assert_eq!(prefix.value, "john*");
88+
assert!(prefix.lenient);
89+
} else {
90+
panic!("Expected QueryAst::Prefix, got {:?}", query_ast);
91+
}
92+
}
93+
94+
#[test]
95+
fn test_prefix_query_convert_to_query_ast_special_chars() {
96+
let prefix_query_json = r#"{
97+
"user_name": {
98+
"value": "a\\dm?n*"
99+
}
100+
}"#;
101+
let prefix_query: PrefixQuery = serde_json::from_str(prefix_query_json).unwrap();
102+
let query_ast = prefix_query.convert_to_query_ast().unwrap();
103+
104+
if let QueryAst::Wildcard(prefix) = query_ast {
105+
assert_eq!(prefix.field, "user_name");
106+
assert_eq!(prefix.value, r"a\\dm\?n\**");
107+
assert!(prefix.lenient);
108+
} else {
109+
panic!("Expected QueryAst::Prefix, got {:?}", query_ast);
110+
}
111+
}
112+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Copyright 2021-Present Datadog, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use serde::Deserialize;
16+
17+
use crate::NotNaNf32;
18+
use crate::elastic_query_dsl::one_field_map::OneFieldMap;
19+
use crate::elastic_query_dsl::{ConvertibleToQueryAst, StringOrStructForSerialization};
20+
use crate::query_ast::{QueryAst, WildcardQuery as AstWildcardQuery};
21+
22+
#[derive(Deserialize, Clone, Eq, PartialEq, Debug)]
23+
#[serde(from = "OneFieldMap<StringOrStructForSerialization<WildcardQueryParams>>")]
24+
pub(crate) struct WildcardQuery {
25+
pub(crate) field: String,
26+
pub(crate) params: WildcardQueryParams,
27+
}
28+
29+
#[derive(Deserialize, Debug, Default, Eq, PartialEq, Clone)]
30+
#[serde(deny_unknown_fields)]
31+
pub struct WildcardQueryParams {
32+
value: String,
33+
#[serde(default)]
34+
pub boost: Option<NotNaNf32>,
35+
}
36+
37+
impl ConvertibleToQueryAst for WildcardQuery {
38+
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
39+
let wildcard_ast: QueryAst = AstWildcardQuery {
40+
field: self.field,
41+
value: self.params.value,
42+
lenient: true,
43+
}
44+
.into();
45+
Ok(wildcard_ast.boost(self.params.boost))
46+
}
47+
}
48+
49+
impl From<OneFieldMap<StringOrStructForSerialization<WildcardQueryParams>>> for WildcardQuery {
50+
fn from(
51+
match_query_params: OneFieldMap<StringOrStructForSerialization<WildcardQueryParams>>,
52+
) -> Self {
53+
let OneFieldMap { field, value } = match_query_params;
54+
WildcardQuery {
55+
field,
56+
params: value.inner,
57+
}
58+
}
59+
}
60+
61+
impl From<String> for WildcardQueryParams {
62+
fn from(value: String) -> WildcardQueryParams {
63+
WildcardQueryParams { value, boost: None }
64+
}
65+
}
66+
67+
#[cfg(test)]
68+
mod tests {
69+
use super::*;
70+
71+
#[test]
72+
fn test_wildcard_query_convert_to_query_ast() {
73+
let wildcard_query_json = r#"{
74+
"user_name": {
75+
"value": "john*"
76+
}
77+
}"#;
78+
let wildcard_query: WildcardQuery = serde_json::from_str(wildcard_query_json).unwrap();
79+
let query_ast = wildcard_query.convert_to_query_ast().unwrap();
80+
81+
if let QueryAst::Wildcard(wildcard) = query_ast {
82+
assert_eq!(wildcard.field, "user_name");
83+
assert_eq!(wildcard.value, "john*");
84+
assert!(wildcard.lenient);
85+
} else {
86+
panic!("Expected QueryAst::Wildcard");
87+
}
88+
}
89+
90+
#[test]
91+
fn test_boosted_wildcard_query_convert_to_query_ast() {
92+
let wildcard_query_json = r#"{
93+
"user_name": {
94+
"value": "john*",
95+
"boost": 2.0
96+
}
97+
}"#;
98+
let wildcard_query: WildcardQuery = serde_json::from_str(wildcard_query_json).unwrap();
99+
let query_ast = wildcard_query.convert_to_query_ast().unwrap();
100+
101+
if let QueryAst::Boost { underlying, boost } = query_ast {
102+
if let QueryAst::Wildcard(wildcard) = *underlying {
103+
assert_eq!(wildcard.field, "user_name");
104+
assert_eq!(wildcard.value, "john*");
105+
assert!(wildcard.lenient);
106+
} else {
107+
panic!("Expected underlying QueryAst::Wildcard");
108+
}
109+
assert_eq!(boost, NotNaNf32::try_from(2.0).unwrap());
110+
} else {
111+
panic!("Expected QueryAst::Wildcard");
112+
}
113+
}
114+
}

quickwit/quickwit-query/src/query_ast/wildcard_query.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,48 @@ mod tests {
255255
}
256256
}
257257

258+
#[test]
259+
fn test_wildcard_query_to_regex_on_escaped_text() {
260+
let query = WildcardQuery {
261+
field: "text_field".to_string(),
262+
value: "MyString Wh1ch\\?a.nOrMal Tokenizer would\\*cut".to_string(),
263+
lenient: false,
264+
};
265+
266+
let tokenizer_manager = create_default_quickwit_tokenizer_manager();
267+
for tokenizer in ["raw", "whitespace"] {
268+
let mut schema_builder = TantivySchema::builder();
269+
let text_options = TextOptions::default()
270+
.set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer));
271+
schema_builder.add_text_field("text_field", text_options);
272+
let schema = schema_builder.build();
273+
274+
let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap();
275+
assert_eq!(regex, "MyString Wh1ch\\?a\\.nOrMal Tokenizer would\\*cut");
276+
assert!(path.is_none());
277+
}
278+
279+
for tokenizer in [
280+
"raw_lowercase",
281+
"lowercase",
282+
"default",
283+
"en_stem",
284+
"chinese_compatible",
285+
"source_code_default",
286+
"source_code_with_hex",
287+
] {
288+
let mut schema_builder = TantivySchema::builder();
289+
let text_options = TextOptions::default()
290+
.set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer));
291+
schema_builder.add_text_field("text_field", text_options);
292+
let schema = schema_builder.build();
293+
294+
let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap();
295+
assert_eq!(regex, "mystring wh1ch\\?a\\.normal tokenizer would\\*cut");
296+
assert!(path.is_none());
297+
}
298+
}
299+
258300
#[test]
259301
fn test_wildcard_query_to_regex_on_json() {
260302
let query = WildcardQuery {

0 commit comments

Comments
 (0)