Skip to content

Commit f2e2a85

Browse files
authored
Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex (#13104)
* Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex Signed-off-by: Niyati Aggarwal <niyatiagg4641@gmail.com> * Adding entry to CHANGELOG.md Signed-off-by: Niyati Aggarwal <niyatiagg4641@gmail.com> * Adding tests for GlobMatch Signed-off-by: Niyati Aggarwal <niyatiagg4641@gmail.com> * Moving entry to Changed section in CHANGELOG.md Signed-off-by: Niyati Aggarwal <niyatiagg4641@gmail.com> --------- Signed-off-by: Niyati Aggarwal <niyatiagg4641@gmail.com>
1 parent c168e1c commit f2e2a85

File tree

4 files changed

+97
-59
lines changed

4 files changed

+97
-59
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3636
- [BWC and API enforcement] Enforcing the presence of API annotations at build time ([#12872](https://github.com/opensearch-project/OpenSearch/pull/12872))
3737
- Improve built-in secure transports support ([#12907](https://github.com/opensearch-project/OpenSearch/pull/12907))
3838
- Update links to documentation in rest-api-spec ([#13043](https://github.com/opensearch-project/OpenSearch/pull/13043))
39+
- Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104))
3940

4041
### Deprecated
4142

libs/common/src/main/java/org/opensearch/common/Glob.java

+27-26
Original file line numberDiff line numberDiff line change
@@ -52,34 +52,35 @@ public static boolean globMatch(String pattern, String str) {
5252
if (pattern == null || str == null) {
5353
return false;
5454
}
55-
int firstIndex = pattern.indexOf('*');
56-
if (firstIndex == -1) {
57-
return pattern.equals(str);
58-
}
59-
if (firstIndex == 0) {
60-
if (pattern.length() == 1) {
61-
return true;
62-
}
63-
int nextIndex = pattern.indexOf('*', firstIndex + 1);
64-
if (nextIndex == -1) {
65-
return str.endsWith(pattern.substring(1));
66-
} else if (nextIndex == 1) {
67-
// Double wildcard "**" - skipping the first "*"
68-
return globMatch(pattern.substring(1), str);
55+
int sIdx = 0, pIdx = 0, match = 0, wildcardIdx = -1;
56+
while (sIdx < str.length()) {
57+
// both chars matching, incrementing both pointers
58+
if (pIdx < pattern.length() && str.charAt(sIdx) == pattern.charAt(pIdx)) {
59+
sIdx++;
60+
pIdx++;
61+
} else if (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') {
62+
// wildcard found, only incrementing pattern pointer
63+
wildcardIdx = pIdx;
64+
match = sIdx;
65+
pIdx++;
66+
} else if (wildcardIdx != -1) {
67+
// last pattern pointer was a wildcard, incrementing string pointer
68+
pIdx = wildcardIdx + 1;
69+
match++;
70+
sIdx = match;
71+
} else {
72+
// current pattern pointer is not a wildcard, last pattern pointer was also not a wildcard
73+
// characters do not match
74+
return false;
6975
}
70-
String part = pattern.substring(1, nextIndex);
71-
int partIndex = str.indexOf(part);
72-
while (partIndex != -1) {
73-
if (globMatch(pattern.substring(nextIndex), str.substring(partIndex + part.length()))) {
74-
return true;
75-
}
76-
partIndex = str.indexOf(part, partIndex + 1);
77-
}
78-
return false;
7976
}
80-
return (str.length() >= firstIndex
81-
&& pattern.substring(0, firstIndex).equals(str.substring(0, firstIndex))
82-
&& globMatch(pattern.substring(firstIndex), str.substring(firstIndex)));
77+
78+
// check for remaining characters in pattern
79+
while (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') {
80+
pIdx++;
81+
}
82+
83+
return pIdx == pattern.length();
8384
}
8485

8586
}

server/src/main/java/org/opensearch/common/regex/Regex.java

+2-33
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.apache.lucene.util.automaton.Automata;
3636
import org.apache.lucene.util.automaton.Automaton;
3737
import org.apache.lucene.util.automaton.Operations;
38+
import org.opensearch.common.Glob;
3839
import org.opensearch.core.common.Strings;
3940

4041
import java.util.ArrayList;
@@ -125,39 +126,7 @@ public static boolean simpleMatch(String pattern, String str, boolean caseInsens
125126
pattern = Strings.toLowercaseAscii(pattern);
126127
str = Strings.toLowercaseAscii(str);
127128
}
128-
return simpleMatchWithNormalizedStrings(pattern, str);
129-
}
130-
131-
private static boolean simpleMatchWithNormalizedStrings(String pattern, String str) {
132-
int sIdx = 0, pIdx = 0, match = 0, wildcardIdx = -1;
133-
while (sIdx < str.length()) {
134-
// both chars matching, incrementing both pointers
135-
if (pIdx < pattern.length() && str.charAt(sIdx) == pattern.charAt(pIdx)) {
136-
sIdx++;
137-
pIdx++;
138-
} else if (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') {
139-
// wildcard found, only incrementing pattern pointer
140-
wildcardIdx = pIdx;
141-
match = sIdx;
142-
pIdx++;
143-
} else if (wildcardIdx != -1) {
144-
// last pattern pointer was a wildcard, incrementing string pointer
145-
pIdx = wildcardIdx + 1;
146-
match++;
147-
sIdx = match;
148-
} else {
149-
// current pattern pointer is not a wildcard, last pattern pointer was also not a wildcard
150-
// characters do not match
151-
return false;
152-
}
153-
}
154-
155-
// check for remaining characters in pattern
156-
while (pIdx < pattern.length() && pattern.charAt(pIdx) == '*') {
157-
pIdx++;
158-
}
159-
160-
return pIdx == pattern.length();
129+
return Glob.globMatch(pattern, str);
161130
}
162131

163132
/**
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.common;
10+
11+
import org.opensearch.test.OpenSearchTestCase;
12+
13+
public class GlobTests extends OpenSearchTestCase {
14+
15+
public void testGlobMatchForNull() {
16+
assertFalse(Glob.globMatch(null, "test"));
17+
assertFalse(Glob.globMatch("test", null));
18+
assertFalse(Glob.globMatch(null, null));
19+
}
20+
21+
public void testGlobMatchNoWildcard() {
22+
assertTrue(Glob.globMatch("abcd", "abcd"));
23+
assertFalse(Glob.globMatch("abcd", "foobar"));
24+
}
25+
26+
public void testGlobMatchSingleWildcard() {
27+
assertTrue(Glob.globMatch("*foo", "barfoo"));
28+
assertFalse(Glob.globMatch("*foo", "foobar"));
29+
assertTrue(Glob.globMatch("foo*", "foobarfoo"));
30+
assertFalse(Glob.globMatch("foo*", "barfoobar"));
31+
assertTrue(Glob.globMatch("foo*bar", "foobarnfoosbar"));
32+
}
33+
34+
public void testGlobMatchMultipleWildcards() {
35+
assertTrue(Glob.globMatch("*foo*", "barfoobar"));
36+
assertFalse(Glob.globMatch("*foo*", "baroofbar"));
37+
assertTrue(Glob.globMatch("*foo*bar", "abcdfooefghbar"));
38+
assertFalse(Glob.globMatch("*foo*bar", "foonotbars"));
39+
}
40+
41+
public void testGlobalMatchDoubleWildcard() {
42+
assertTrue(Glob.globMatch("**foo", "barbarfoo"));
43+
assertFalse(Glob.globMatch("**foo", "barbarfoowoof"));
44+
assertTrue(Glob.globMatch("**bar**", "foobarfoo"));
45+
assertFalse(Glob.globMatch("**bar**", "foobanfoo"));
46+
}
47+
48+
public void testGlobMatchMultipleCharactersWithSingleWildcard() {
49+
assertTrue(Glob.globMatch("a*b", "acb"));
50+
assertTrue(Glob.globMatch("f*oo", "foo"));
51+
assertTrue(Glob.globMatch("a*b", "aab"));
52+
assertTrue(Glob.globMatch("a*b", "aaab"));
53+
}
54+
55+
public void testGlobMatchWildcardWithEmptyString() {
56+
assertTrue(Glob.globMatch("*", ""));
57+
assertTrue(Glob.globMatch("a*", "a"));
58+
assertFalse(Glob.globMatch("a*", ""));
59+
}
60+
61+
public void testGlobMatchMultipleWildcardsWithMultipleCharacters() {
62+
assertTrue(Glob.globMatch("a*b*c", "abc"));
63+
assertTrue(Glob.globMatch("a*b*c", "axxxbxbc"));
64+
assertFalse(Glob.globMatch("a*b*c", "abca"));
65+
assertFalse(Glob.globMatch("a*b*c", "ac"));
66+
}
67+
}

0 commit comments

Comments
 (0)