Skip to content

Commit aaa92ae

Browse files
HUSTERGSgesong.samuel
and
gesong.samuel
authored
Fix case insensitive query on wildcard field (opensearch-project#15882)
* fix case insensitive query on wildcard field Signed-off-by: gesong.samuel <gesong.samuel@bytedance.com> * fix YAML test Signed-off-by: gesong.samuel <gesong.samuel@bytedance.com> * add change log Signed-off-by: gesong.samuel <gesong.samuel@bytedance.com> --------- Signed-off-by: gesong.samuel <gesong.samuel@bytedance.com> Co-authored-by: gesong.samuel <gesong.samuel@bytedance.com>
1 parent 36c89bf commit aaa92ae

File tree

3 files changed

+49
-9
lines changed

3 files changed

+49
-9
lines changed

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2525

2626
### Fixed
2727
- Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737))
28-
28+
- Fix case-insensitive query on wildcard field ([#15882](https://github.com/opensearch-project/OpenSearch/pull/15882))
2929
### Security
3030

3131
[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.17...2.x

rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml

+37-4
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ setup:
5656
id: 6
5757
body:
5858
other_field: "test"
59+
- do:
60+
index:
61+
index: test
62+
id: 7
63+
body:
64+
my_field: "ABCD"
5965
- do:
6066
indices.refresh: {}
6167

@@ -90,8 +96,9 @@ setup:
9096
query:
9197
term:
9298
my_field.lower: "abcd"
93-
- match: { hits.total.value: 1 }
99+
- match: { hits.total.value: 2 }
94100
- match: { hits.hits.0._id: "5" }
101+
- match: { hits.hits.1._id: "7" }
95102

96103
- do:
97104
search:
@@ -100,8 +107,9 @@ setup:
100107
query:
101108
term:
102109
my_field.lower: "ABCD"
103-
- match: { hits.total.value: 1 }
110+
- match: { hits.total.value: 2 }
104111
- match: { hits.hits.0._id: "5" }
112+
- match: { hits.hits.1._id: "7" }
105113

106114
- do:
107115
search:
@@ -215,7 +223,7 @@ setup:
215223
wildcard:
216224
my_field:
217225
value: "*"
218-
- match: { hits.total.value: 5 }
226+
- match: { hits.total.value: 6 }
219227
---
220228
"regexp match-all works":
221229
- do:
@@ -226,7 +234,7 @@ setup:
226234
regexp:
227235
my_field:
228236
value: ".*"
229-
- match: { hits.total.value: 5 }
237+
- match: { hits.total.value: 6 }
230238
---
231239
"terms query on wildcard field matches":
232240
- do:
@@ -237,3 +245,28 @@ setup:
237245
terms: { my_field: ["AbCd"] }
238246
- match: { hits.total.value: 1 }
239247
- match: { hits.hits.0._id: "5" }
248+
---
249+
"case insensitive query on wildcard field":
250+
- do:
251+
search:
252+
index: test
253+
body:
254+
query:
255+
wildcard:
256+
my_field:
257+
value: "AbCd"
258+
- match: { hits.total.value: 1 }
259+
- match: { hits.hits.0._id: "5" }
260+
261+
- do:
262+
search:
263+
index: test
264+
body:
265+
query:
266+
wildcard:
267+
my_field:
268+
value: "AbCd"
269+
case_insensitive: true
270+
- match: { hits.total.value: 2 }
271+
- match: { hits.hits.0._id: "5" }
272+
- match: { hits.hits.1._id: "7" }

server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java

+11-4
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.apache.lucene.util.automaton.RegExp;
4141
import org.opensearch.common.lucene.BytesRefs;
4242
import org.opensearch.common.lucene.Lucene;
43+
import org.opensearch.common.lucene.search.AutomatonQueries;
4344
import org.opensearch.common.unit.Fuzziness;
4445
import org.opensearch.core.xcontent.XContentParser;
4546
import org.opensearch.index.analysis.IndexAnalyzers;
@@ -464,7 +465,7 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo
464465
return existsQuery(context);
465466
}
466467
} else {
467-
approximation = matchAllTermsQuery(name(), requiredNGrams);
468+
approximation = matchAllTermsQuery(name(), requiredNGrams, caseInsensitive);
468469
}
469470
return new WildcardMatchingQuery(name(), approximation, matchPredicate, value, context, this);
470471
}
@@ -678,7 +679,7 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
678679
StringBuilder pattern = new StringBuilder();
679680
for (Object value : values) {
680681
String stringVal = BytesRefs.toString(value);
681-
builder.add(matchAllTermsQuery(name(), getRequiredNGrams(stringVal)), BooleanClause.Occur.SHOULD);
682+
builder.add(matchAllTermsQuery(name(), getRequiredNGrams(stringVal), false), BooleanClause.Occur.SHOULD);
682683
expectedValues.add(stringVal);
683684
if (pattern.length() > 0) {
684685
pattern.append('|');
@@ -688,10 +689,16 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
688689
return new WildcardMatchingQuery(name(), builder.build(), expectedValues::contains, pattern.toString(), context, this);
689690
}
690691

691-
private static BooleanQuery matchAllTermsQuery(String fieldName, Set<String> terms) {
692+
private static BooleanQuery matchAllTermsQuery(String fieldName, Set<String> terms, boolean caseInsensitive) {
692693
BooleanQuery.Builder matchAllTermsBuilder = new BooleanQuery.Builder();
694+
Query query;
693695
for (String term : terms) {
694-
matchAllTermsBuilder.add(new TermQuery(new Term(fieldName, term)), BooleanClause.Occur.FILTER);
696+
if (caseInsensitive) {
697+
query = AutomatonQueries.caseInsensitiveTermQuery(new Term(fieldName, term));
698+
} else {
699+
query = new TermQuery(new Term(fieldName, term));
700+
}
701+
matchAllTermsBuilder.add(query, BooleanClause.Occur.FILTER);
695702
}
696703
return matchAllTermsBuilder.build();
697704
}

0 commit comments

Comments
 (0)