Decouple IndexSettings from IncludeExclude (opensearch-project#2860) (opensearch-project#2861)

opensearch-trigger-bot[bot] · kartg · web-flow · commit 3af4300c3f62 · 2022-04-13T14:13:18.000-04:00
This change refactors an earlier change to impose a reg-ex size limit on the include/exclude string. Instead of accepting an IndexSettings instance, the class now accepts a integer limit value. This is necessary because the IncludeExclude class is used outside the core codebase, whose use-cases may be unaware of indices and their settings. To ensure that a limit is always imposed, a default limit is defined in the class. (cherry picked from commit ba19668) Signed-off-by: Kartik Ganesh <gkart@amazon.com> Co-authored-by: Kartik Ganesh <gkart@amazon.com>
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/IncludeExclude.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/IncludeExclude.java
@@ -79,6 +79,14 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
     // can disagree on which terms hash to the required partition.
     private static final int HASH_PARTITIONING_SEED = 31;
 
+    /**
+     * The default length limit for a reg-ex string. The value is derived from {@link IndexSettings#MAX_REGEX_LENGTH_SETTING}.
+     * For context, see:
+     * https://github.com/opensearch-project/OpenSearch/issues/1992
+     * https://github.com/opensearch-project/OpenSearch/issues/2858
+     */
+    private static final int DEFAULT_MAX_REGEX_LENGTH = 1000;
+
     // for parsing purposes only
     // TODO: move all aggs to the same package so that this stuff could be pkg-private
     public static IncludeExclude merge(IncludeExclude include, IncludeExclude exclude) {
@@ -576,18 +584,18 @@ public boolean isPartitionBased() {
         return incNumPartitions > 0;
     }
 
-    private Automaton toAutomaton(IndexSettings indexSettings) {
+    private Automaton toAutomaton(int maxRegExLength) {
         Automaton a;
         if (include != null) {
-            validateRegExpStringLength(include, indexSettings);
+            validateRegExpStringLength(include, maxRegExLength);
             a = new RegExp(include).toAutomaton();
         } else if (includeValues != null) {
             a = Automata.makeStringUnion(includeValues);
         } else {
             a = Automata.makeAnyString();
         }
         if (exclude != null) {
-            validateRegExpStringLength(exclude, indexSettings);
+            validateRegExpStringLength(exclude, maxRegExLength);
             Automaton excludeAutomaton = new RegExp(exclude).toAutomaton();
             a = Operations.minus(a, excludeAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
         } else if (excludeValues != null) {
@@ -596,8 +604,7 @@ private Automaton toAutomaton(IndexSettings indexSettings) {
         return a;
     }
 
-    private static void validateRegExpStringLength(String source, IndexSettings indexSettings) {
-        int maxRegexLength = indexSettings.getMaxRegexLength();
+    private static void validateRegExpStringLength(String source, int maxRegexLength) {
         if (maxRegexLength > 0 && source.length() > maxRegexLength) {
             throw new IllegalArgumentException(
                 "The length of regex ["
@@ -613,9 +620,17 @@ private static void validateRegExpStringLength(String source, IndexSettings inde
         }
     }
 
-    public StringFilter convertToStringFilter(DocValueFormat format, IndexSettings indexSettings) {
+    /**
+     * Wrapper method that imposes a default regex limit.
+     * See https://github.com/opensearch-project/OpenSearch/issues/2858
+     */
+    public StringFilter convertToStringFilter(DocValueFormat format) {
+        return convertToStringFilter(format, DEFAULT_MAX_REGEX_LENGTH);
+    }
+
+    public StringFilter convertToStringFilter(DocValueFormat format, int maxRegexLength) {
         if (isRegexBased()) {
-            return new AutomatonBackedStringFilter(toAutomaton(indexSettings));
+            return new AutomatonBackedStringFilter(toAutomaton(maxRegexLength));
         }
         if (isPartitionBased()) {
             return new PartitionedStringFilter();
@@ -636,10 +651,18 @@ private static SortedSet<BytesRef> parseForDocValues(SortedSet<BytesRef> endUser
         return result;
     }
 
-    public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, IndexSettings indexSettings) {
+    /**
+     * Wrapper method that imposes a default regex limit.
+     * See https://github.com/opensearch-project/OpenSearch/issues/2858
+     */
+    public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
+        return convertToOrdinalsFilter(format, DEFAULT_MAX_REGEX_LENGTH);
+    }
+
+    public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, int maxRegexLength) {
 
         if (isRegexBased()) {
-            return new AutomatonBackedOrdinalsFilter(toAutomaton(indexSettings));
+            return new AutomatonBackedOrdinalsFilter(toAutomaton(maxRegexLength));
         }
         if (isPartitionBased()) {
             return new PartitionedOrdinalsFilter();
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java
@@ -34,7 +34,6 @@
 
 import org.opensearch.common.ParseField;
 import org.opensearch.common.logging.DeprecationLogger;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.Aggregator;
@@ -251,10 +250,10 @@ Aggregator create(
                 double precision,
                 CardinalityUpperBound cardinality
             ) throws IOException {
-                IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                 final IncludeExclude.StringFilter filter = includeExclude == null
                     ? null
-                    : includeExclude.convertToStringFilter(format, indexSettings);
+                    : includeExclude.convertToStringFilter(format, maxRegexLength);
                 return new StringRareTermsAggregator(
                     name,
                     factories,
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTermsAggregatorFactory.java
@@ -34,7 +34,6 @@
 
 import org.opensearch.common.ParseField;
 import org.opensearch.common.logging.DeprecationLogger;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.query.QueryBuilder;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.search.DocValueFormat;
@@ -326,10 +325,10 @@ Aggregator create(
                 CardinalityUpperBound cardinality,
                 Map<String, Object> metadata
             ) throws IOException {
-                IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                 final IncludeExclude.StringFilter filter = includeExclude == null
                     ? null
-                    : includeExclude.convertToStringFilter(format, indexSettings);
+                    : includeExclude.convertToStringFilter(format, maxRegexLength);
                 return new MapStringTermsAggregator(
                     name,
                     factories,
@@ -367,10 +366,10 @@ Aggregator create(
                 CardinalityUpperBound cardinality,
                 Map<String, Object> metadata
             ) throws IOException {
-                IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                 final IncludeExclude.OrdinalsFilter filter = includeExclude == null
                     ? null
-                    : includeExclude.convertToOrdinalsFilter(format, indexSettings);
+                    : includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
                 boolean remapGlobalOrd = true;
                 if (cardinality == CardinalityUpperBound.ONE && factories == AggregatorFactories.EMPTY && includeExclude == null) {
                     /*
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorFactory.java
@@ -44,7 +44,6 @@
 import org.opensearch.common.util.BigArrays;
 import org.opensearch.common.util.BytesRefHash;
 import org.opensearch.common.util.ObjectArray;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.mapper.MappedFieldType;
 import org.opensearch.index.query.QueryBuilder;
 import org.opensearch.index.query.QueryShardContext;
@@ -138,10 +137,10 @@ protected Aggregator createInternal(
 
         // TODO - need to check with mapping that this is indeed a text field....
 
-        IndexSettings indexSettings = searchContext.getQueryShardContext().getIndexSettings();
+        int maxRegexLength = searchContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
         IncludeExclude.StringFilter incExcFilter = includeExclude == null
             ? null
-            : includeExclude.convertToStringFilter(DocValueFormat.RAW, indexSettings);
+            : includeExclude.convertToStringFilter(DocValueFormat.RAW, maxRegexLength);
 
         MapStringTermsAggregator.CollectorSource collectorSource = new SignificantTextCollectorSource(
             queryShardContext.lookup().source(),
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java
@@ -34,7 +34,6 @@
 
 import org.apache.lucene.search.IndexSearcher;
 import org.opensearch.common.ParseField;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.query.QueryShardContext;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.AggregationExecutionException;
@@ -381,10 +380,10 @@ Aggregator create(
                 CardinalityUpperBound cardinality,
                 Map<String, Object> metadata
             ) throws IOException {
-                IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                 final IncludeExclude.StringFilter filter = includeExclude == null
                     ? null
-                    : includeExclude.convertToStringFilter(format, indexSettings);
+                    : includeExclude.convertToStringFilter(format, maxRegexLength);
                 return new MapStringTermsAggregator(
                     name,
                     factories,
@@ -462,10 +461,10 @@ Aggregator create(
                     );
 
                 }
-                IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
+                int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
                 final IncludeExclude.OrdinalsFilter filter = includeExclude == null
                     ? null
-                    : includeExclude.convertToOrdinalsFilter(format, indexSettings);
+                    : includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
                 boolean remapGlobalOrds;
                 if (cardinality == CardinalityUpperBound.ONE && REMAP_GLOBAL_ORDS != null) {
                     /*
diff --git a/server/src/test/java/org/opensearch/search/aggregations/support/IncludeExcludeTests.java b/server/src/test/java/org/opensearch/search/aggregations/support/IncludeExcludeTests.java
@@ -36,16 +36,12 @@
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LongBitSet;
-import org.opensearch.Version;
-import org.opensearch.cluster.metadata.IndexMetadata;
 import org.opensearch.common.ParseField;
-import org.opensearch.common.settings.Settings;
 import org.opensearch.common.xcontent.ToXContent;
 import org.opensearch.common.xcontent.XContentBuilder;
 import org.opensearch.common.xcontent.XContentFactory;
 import org.opensearch.common.xcontent.XContentParser;
 import org.opensearch.common.xcontent.XContentType;
-import org.opensearch.index.IndexSettings;
 import org.opensearch.index.fielddata.AbstractSortedSetDocValues;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
@@ -58,23 +54,14 @@
 
 public class IncludeExcludeTests extends OpenSearchTestCase {
 
-    private final IndexSettings dummyIndexSettings = new IndexSettings(
-        IndexMetadata.builder("index")
-            .settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
-            .numberOfShards(1)
-            .numberOfReplicas(0)
-            .build(),
-        Settings.EMPTY
-    );
-
     public void testEmptyTermsWithOrds() throws IOException {
         IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
-        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
         LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
         assertEquals(0, acceptedOrds.length());
 
         inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
         acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
         assertEquals(0, acceptedOrds.length());
     }
@@ -113,13 +100,13 @@ public long getValueCount() {
 
         };
         IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
-        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
         LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
         assertEquals(1, acceptedOrds.length());
         assertTrue(acceptedOrds.get(0));
 
         inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
         acceptedOrds = filter.acceptedGlobalOrdinals(ords);
         assertEquals(1, acceptedOrds.length());
         assertFalse(acceptedOrds.get(0));
@@ -128,7 +115,7 @@ public long getValueCount() {
             new TreeSet<>(Collections.singleton(new BytesRef("foo"))),
             new TreeSet<>(Collections.singleton(new BytesRef("foo")))
         );
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
         acceptedOrds = filter.acceptedGlobalOrdinals(ords);
         assertEquals(1, acceptedOrds.length());
         assertFalse(acceptedOrds.get(0));
@@ -137,7 +124,7 @@ public long getValueCount() {
             null, // means everything included
             new TreeSet<>(Collections.singleton(new BytesRef("foo")))
         );
-        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
+        filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
         acceptedOrds = filter.acceptedGlobalOrdinals(ords);
         assertEquals(1, acceptedOrds.length());
         assertFalse(acceptedOrds.get(0));