Skip to content

Commit 3af4300

Browse files
Decouple IndexSettings from IncludeExclude (opensearch-project#2860) (opensearch-project#2861)
This change refactors an earlier change to impose a reg-ex size limit on the include/exclude string. Instead of accepting an IndexSettings instance, the class now accepts a integer limit value. This is necessary because the IncludeExclude class is used outside the core codebase, whose use-cases may be unaware of indices and their settings. To ensure that a limit is always imposed, a default limit is defined in the class. (cherry picked from commit ba19668) Signed-off-by: Kartik Ganesh <gkart@amazon.com> Co-authored-by: Kartik Ganesh <gkart@amazon.com>
1 parent 135177e commit 3af4300

File tree

6 files changed

+50
-44
lines changed

6 files changed

+50
-44
lines changed

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/IncludeExclude.java

+32-9
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,14 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
7979
// can disagree on which terms hash to the required partition.
8080
private static final int HASH_PARTITIONING_SEED = 31;
8181

82+
/**
83+
* The default length limit for a reg-ex string. The value is derived from {@link IndexSettings#MAX_REGEX_LENGTH_SETTING}.
84+
* For context, see:
85+
* https://github.com/opensearch-project/OpenSearch/issues/1992
86+
* https://github.com/opensearch-project/OpenSearch/issues/2858
87+
*/
88+
private static final int DEFAULT_MAX_REGEX_LENGTH = 1000;
89+
8290
// for parsing purposes only
8391
// TODO: move all aggs to the same package so that this stuff could be pkg-private
8492
public static IncludeExclude merge(IncludeExclude include, IncludeExclude exclude) {
@@ -576,18 +584,18 @@ public boolean isPartitionBased() {
576584
return incNumPartitions > 0;
577585
}
578586

579-
private Automaton toAutomaton(IndexSettings indexSettings) {
587+
private Automaton toAutomaton(int maxRegExLength) {
580588
Automaton a;
581589
if (include != null) {
582-
validateRegExpStringLength(include, indexSettings);
590+
validateRegExpStringLength(include, maxRegExLength);
583591
a = new RegExp(include).toAutomaton();
584592
} else if (includeValues != null) {
585593
a = Automata.makeStringUnion(includeValues);
586594
} else {
587595
a = Automata.makeAnyString();
588596
}
589597
if (exclude != null) {
590-
validateRegExpStringLength(exclude, indexSettings);
598+
validateRegExpStringLength(exclude, maxRegExLength);
591599
Automaton excludeAutomaton = new RegExp(exclude).toAutomaton();
592600
a = Operations.minus(a, excludeAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
593601
} else if (excludeValues != null) {
@@ -596,8 +604,7 @@ private Automaton toAutomaton(IndexSettings indexSettings) {
596604
return a;
597605
}
598606

599-
private static void validateRegExpStringLength(String source, IndexSettings indexSettings) {
600-
int maxRegexLength = indexSettings.getMaxRegexLength();
607+
private static void validateRegExpStringLength(String source, int maxRegexLength) {
601608
if (maxRegexLength > 0 && source.length() > maxRegexLength) {
602609
throw new IllegalArgumentException(
603610
"The length of regex ["
@@ -613,9 +620,17 @@ private static void validateRegExpStringLength(String source, IndexSettings inde
613620
}
614621
}
615622

616-
public StringFilter convertToStringFilter(DocValueFormat format, IndexSettings indexSettings) {
623+
/**
624+
* Wrapper method that imposes a default regex limit.
625+
* See https://github.com/opensearch-project/OpenSearch/issues/2858
626+
*/
627+
public StringFilter convertToStringFilter(DocValueFormat format) {
628+
return convertToStringFilter(format, DEFAULT_MAX_REGEX_LENGTH);
629+
}
630+
631+
public StringFilter convertToStringFilter(DocValueFormat format, int maxRegexLength) {
617632
if (isRegexBased()) {
618-
return new AutomatonBackedStringFilter(toAutomaton(indexSettings));
633+
return new AutomatonBackedStringFilter(toAutomaton(maxRegexLength));
619634
}
620635
if (isPartitionBased()) {
621636
return new PartitionedStringFilter();
@@ -636,10 +651,18 @@ private static SortedSet<BytesRef> parseForDocValues(SortedSet<BytesRef> endUser
636651
return result;
637652
}
638653

639-
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, IndexSettings indexSettings) {
654+
/**
655+
* Wrapper method that imposes a default regex limit.
656+
* See https://github.com/opensearch-project/OpenSearch/issues/2858
657+
*/
658+
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
659+
return convertToOrdinalsFilter(format, DEFAULT_MAX_REGEX_LENGTH);
660+
}
661+
662+
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, int maxRegexLength) {
640663

641664
if (isRegexBased()) {
642-
return new AutomatonBackedOrdinalsFilter(toAutomaton(indexSettings));
665+
return new AutomatonBackedOrdinalsFilter(toAutomaton(maxRegexLength));
643666
}
644667
if (isPartitionBased()) {
645668
return new PartitionedOrdinalsFilter();

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434

3535
import org.opensearch.common.ParseField;
3636
import org.opensearch.common.logging.DeprecationLogger;
37-
import org.opensearch.index.IndexSettings;
3837
import org.opensearch.index.query.QueryShardContext;
3938
import org.opensearch.search.DocValueFormat;
4039
import org.opensearch.search.aggregations.Aggregator;
@@ -251,10 +250,10 @@ Aggregator create(
251250
double precision,
252251
CardinalityUpperBound cardinality
253252
) throws IOException {
254-
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
253+
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
255254
final IncludeExclude.StringFilter filter = includeExclude == null
256255
? null
257-
: includeExclude.convertToStringFilter(format, indexSettings);
256+
: includeExclude.convertToStringFilter(format, maxRegexLength);
258257
return new StringRareTermsAggregator(
259258
name,
260259
factories,

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java

+4-5
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434

3535
import org.opensearch.common.ParseField;
3636
import org.opensearch.common.logging.DeprecationLogger;
37-
import org.opensearch.index.IndexSettings;
3837
import org.opensearch.index.query.QueryBuilder;
3938
import org.opensearch.index.query.QueryShardContext;
4039
import org.opensearch.search.DocValueFormat;
@@ -326,10 +325,10 @@ Aggregator create(
326325
CardinalityUpperBound cardinality,
327326
Map<String, Object> metadata
328327
) throws IOException {
329-
IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
328+
int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
330329
final IncludeExclude.StringFilter filter = includeExclude == null
331330
? null
332-
: includeExclude.convertToStringFilter(format, indexSettings);
331+
: includeExclude.convertToStringFilter(format, maxRegexLength);
333332
return new MapStringTermsAggregator(
334333
name,
335334
factories,
@@ -367,10 +366,10 @@ Aggregator create(
367366
CardinalityUpperBound cardinality,
368367
Map<String, Object> metadata
369368
) throws IOException {
370-
IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
369+
int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
371370
final IncludeExclude.OrdinalsFilter filter = includeExclude == null
372371
? null
373-
: includeExclude.convertToOrdinalsFilter(format, indexSettings);
372+
: includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
374373
boolean remapGlobalOrd = true;
375374
if (cardinality == CardinalityUpperBound.ONE && factories == AggregatorFactories.EMPTY && includeExclude == null) {
376375
/*

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
import org.opensearch.common.util.BigArrays;
4545
import org.opensearch.common.util.BytesRefHash;
4646
import org.opensearch.common.util.ObjectArray;
47-
import org.opensearch.index.IndexSettings;
4847
import org.opensearch.index.mapper.MappedFieldType;
4948
import org.opensearch.index.query.QueryBuilder;
5049
import org.opensearch.index.query.QueryShardContext;
@@ -138,10 +137,10 @@ protected Aggregator createInternal(
138137

139138
// TODO - need to check with mapping that this is indeed a text field....
140139

141-
IndexSettings indexSettings = searchContext.getQueryShardContext().getIndexSettings();
140+
int maxRegexLength = searchContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
142141
IncludeExclude.StringFilter incExcFilter = includeExclude == null
143142
? null
144-
: includeExclude.convertToStringFilter(DocValueFormat.RAW, indexSettings);
143+
: includeExclude.convertToStringFilter(DocValueFormat.RAW, maxRegexLength);
145144

146145
MapStringTermsAggregator.CollectorSource collectorSource = new SignificantTextCollectorSource(
147146
queryShardContext.lookup().source(),

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java

+4-5
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434

3535
import org.apache.lucene.search.IndexSearcher;
3636
import org.opensearch.common.ParseField;
37-
import org.opensearch.index.IndexSettings;
3837
import org.opensearch.index.query.QueryShardContext;
3938
import org.opensearch.search.DocValueFormat;
4039
import org.opensearch.search.aggregations.AggregationExecutionException;
@@ -381,10 +380,10 @@ Aggregator create(
381380
CardinalityUpperBound cardinality,
382381
Map<String, Object> metadata
383382
) throws IOException {
384-
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
383+
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
385384
final IncludeExclude.StringFilter filter = includeExclude == null
386385
? null
387-
: includeExclude.convertToStringFilter(format, indexSettings);
386+
: includeExclude.convertToStringFilter(format, maxRegexLength);
388387
return new MapStringTermsAggregator(
389388
name,
390389
factories,
@@ -462,10 +461,10 @@ Aggregator create(
462461
);
463462

464463
}
465-
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
464+
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
466465
final IncludeExclude.OrdinalsFilter filter = includeExclude == null
467466
? null
468-
: includeExclude.convertToOrdinalsFilter(format, indexSettings);
467+
: includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
469468
boolean remapGlobalOrds;
470469
if (cardinality == CardinalityUpperBound.ONE && REMAP_GLOBAL_ORDS != null) {
471470
/*

server/src/test/java/org/opensearch/search/aggregations/support/IncludeExcludeTests.java

+6-19
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,12 @@
3636
import org.apache.lucene.index.SortedSetDocValues;
3737
import org.apache.lucene.util.BytesRef;
3838
import org.apache.lucene.util.LongBitSet;
39-
import org.opensearch.Version;
40-
import org.opensearch.cluster.metadata.IndexMetadata;
4139
import org.opensearch.common.ParseField;
42-
import org.opensearch.common.settings.Settings;
4340
import org.opensearch.common.xcontent.ToXContent;
4441
import org.opensearch.common.xcontent.XContentBuilder;
4542
import org.opensearch.common.xcontent.XContentFactory;
4643
import org.opensearch.common.xcontent.XContentParser;
4744
import org.opensearch.common.xcontent.XContentType;
48-
import org.opensearch.index.IndexSettings;
4945
import org.opensearch.index.fielddata.AbstractSortedSetDocValues;
5046
import org.opensearch.search.DocValueFormat;
5147
import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
@@ -58,23 +54,14 @@
5854

5955
public class IncludeExcludeTests extends OpenSearchTestCase {
6056

61-
private final IndexSettings dummyIndexSettings = new IndexSettings(
62-
IndexMetadata.builder("index")
63-
.settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
64-
.numberOfShards(1)
65-
.numberOfReplicas(0)
66-
.build(),
67-
Settings.EMPTY
68-
);
69-
7057
public void testEmptyTermsWithOrds() throws IOException {
7158
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
72-
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
59+
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
7360
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
7461
assertEquals(0, acceptedOrds.length());
7562

7663
inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
77-
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
64+
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
7865
acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
7966
assertEquals(0, acceptedOrds.length());
8067
}
@@ -113,13 +100,13 @@ public long getValueCount() {
113100

114101
};
115102
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
116-
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
103+
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
117104
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
118105
assertEquals(1, acceptedOrds.length());
119106
assertTrue(acceptedOrds.get(0));
120107

121108
inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
122-
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
109+
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
123110
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
124111
assertEquals(1, acceptedOrds.length());
125112
assertFalse(acceptedOrds.get(0));
@@ -128,7 +115,7 @@ public long getValueCount() {
128115
new TreeSet<>(Collections.singleton(new BytesRef("foo"))),
129116
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
130117
);
131-
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
118+
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
132119
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
133120
assertEquals(1, acceptedOrds.length());
134121
assertFalse(acceptedOrds.get(0));
@@ -137,7 +124,7 @@ public long getValueCount() {
137124
null, // means everything included
138125
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
139126
);
140-
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
127+
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
141128
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
142129
assertEquals(1, acceptedOrds.length());
143130
assertFalse(acceptedOrds.get(0));

0 commit comments

Comments
 (0)