Skip to content

Commit 13f04d9

Browse files
authored
Refactor Grok validate pattern to iterative approach (#14206)
* grok validate patterns recusrion to iterative Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com> * Add max depth in resolving a pattern to avoid OOM Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com> * change path from deque to arraylist Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com> * rename queue to stack Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com> * Change max depth to 500 Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com> * typo originPatternName fix Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com> * spotless Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com> --------- Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
1 parent bf56227 commit 13f04d9

File tree

3 files changed

+99
-34
lines changed

3 files changed

+99
-34
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6161
- Fix file cache initialization ([#14004](https://github.com/opensearch-project/OpenSearch/pull/14004))
6262
- Handle NPE in GetResult if "found" field is missing ([#14552](https://github.com/opensearch-project/OpenSearch/pull/14552))
6363
- Refactoring FilterPath.parse by using an iterative approach ([#14200](https://github.com/opensearch-project/OpenSearch/pull/14200))
64+
- Refactoring Grok.validatePatternBank by using an iterative approach ([#14206](https://github.com/opensearch-project/OpenSearch/pull/14206))
6465

6566
### Security
6667

libs/grok/src/main/java/org/opensearch/grok/Grok.java

+88-34
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,18 @@
3737
import java.io.InputStream;
3838
import java.io.InputStreamReader;
3939
import java.nio.charset.StandardCharsets;
40+
import java.util.ArrayDeque;
4041
import java.util.ArrayList;
4142
import java.util.Collections;
43+
import java.util.Deque;
44+
import java.util.HashMap;
45+
import java.util.HashSet;
4246
import java.util.Iterator;
4347
import java.util.LinkedHashMap;
4448
import java.util.List;
4549
import java.util.Locale;
4650
import java.util.Map;
47-
import java.util.Stack;
51+
import java.util.Set;
4852
import java.util.function.Consumer;
4953

5054
import org.jcodings.specific.UTF8Encoding;
@@ -86,6 +90,7 @@ public final class Grok {
8690
UTF8Encoding.INSTANCE,
8791
Syntax.DEFAULT
8892
);
93+
private static final int MAX_PATTERN_DEPTH_SIZE = 500;
8994

9095
private static final int MAX_TO_REGEX_ITERATIONS = 100_000; // sanity limit
9196

@@ -128,7 +133,7 @@ private Grok(
128133
expressionBytes.length,
129134
Option.DEFAULT,
130135
UTF8Encoding.INSTANCE,
131-
message -> logCallBack.accept(message)
136+
logCallBack::accept
132137
);
133138

134139
List<GrokCaptureConfig> captureConfig = new ArrayList<>();
@@ -144,7 +149,7 @@ private Grok(
144149
*/
145150
private void validatePatternBank() {
146151
for (String patternName : patternBank.keySet()) {
147-
validatePatternBank(patternName, new Stack<>());
152+
validatePatternBank(patternName);
148153
}
149154
}
150155

@@ -156,33 +161,84 @@ private void validatePatternBank() {
156161
* a reference to another named pattern. This method will navigate to all these named patterns and
157162
* check for a circular reference.
158163
*/
159-
private void validatePatternBank(String patternName, Stack<String> path) {
160-
String pattern = patternBank.get(patternName);
161-
boolean isSelfReference = pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":");
162-
if (isSelfReference) {
163-
throwExceptionForCircularReference(patternName, pattern);
164-
} else if (path.contains(patternName)) {
165-
// current pattern name is already in the path, fetch its predecessor
166-
String prevPatternName = path.pop();
167-
String prevPattern = patternBank.get(prevPatternName);
168-
throwExceptionForCircularReference(prevPatternName, prevPattern, patternName, path);
169-
}
170-
path.push(patternName);
171-
for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) {
172-
int begin = i + 2;
173-
int syntaxEndIndex = pattern.indexOf('}', begin);
174-
if (syntaxEndIndex == -1) {
175-
throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]");
164+
private void validatePatternBank(String initialPatternName) {
165+
Deque<Frame> stack = new ArrayDeque<>();
166+
Set<String> visitedPatterns = new HashSet<>();
167+
Map<String, List<String>> pathMap = new HashMap<>();
168+
169+
List<String> initialPath = new ArrayList<>();
170+
initialPath.add(initialPatternName);
171+
pathMap.put(initialPatternName, initialPath);
172+
stack.push(new Frame(initialPatternName, initialPath, 0));
173+
174+
while (!stack.isEmpty()) {
175+
Frame frame = stack.peek();
176+
String patternName = frame.patternName;
177+
List<String> path = frame.path;
178+
int startIndex = frame.startIndex;
179+
String pattern = patternBank.get(patternName);
180+
181+
if (visitedPatterns.contains(patternName)) {
182+
stack.pop();
183+
continue;
184+
}
185+
186+
visitedPatterns.add(patternName);
187+
boolean foundDependency = false;
188+
189+
for (int i = startIndex; i < pattern.length(); i++) {
190+
if (pattern.startsWith("%{", i)) {
191+
int begin = i + 2;
192+
int syntaxEndIndex = pattern.indexOf('}', begin);
193+
if (syntaxEndIndex == -1) {
194+
throw new IllegalArgumentException("Malformed pattern [" + patternName + "][" + pattern + "]");
195+
}
196+
197+
int semanticNameIndex = pattern.indexOf(':', begin);
198+
int end = semanticNameIndex == -1 ? syntaxEndIndex : Math.min(syntaxEndIndex, semanticNameIndex);
199+
200+
String dependsOnPattern = pattern.substring(begin, end);
201+
202+
if (dependsOnPattern.equals(patternName)) {
203+
throwExceptionForCircularReference(patternName, pattern);
204+
}
205+
206+
if (pathMap.containsKey(dependsOnPattern)) {
207+
throwExceptionForCircularReference(patternName, pattern, dependsOnPattern, path.subList(0, path.size() - 1));
208+
}
209+
210+
List<String> newPath = new ArrayList<>(path);
211+
newPath.add(dependsOnPattern);
212+
pathMap.put(dependsOnPattern, newPath);
213+
214+
stack.push(new Frame(dependsOnPattern, newPath, 0));
215+
frame.startIndex = i + 1;
216+
foundDependency = true;
217+
break;
218+
}
176219
}
177-
int semanticNameIndex = pattern.indexOf(':', begin);
178-
int end = syntaxEndIndex;
179-
if (semanticNameIndex != -1) {
180-
end = Math.min(syntaxEndIndex, semanticNameIndex);
220+
221+
if (!foundDependency) {
222+
pathMap.remove(patternName);
223+
stack.pop();
224+
}
225+
226+
if (stack.size() > MAX_PATTERN_DEPTH_SIZE) {
227+
throw new IllegalArgumentException("Pattern references exceeded maximum depth of " + MAX_PATTERN_DEPTH_SIZE);
181228
}
182-
String dependsOnPattern = pattern.substring(begin, end);
183-
validatePatternBank(dependsOnPattern, path);
184229
}
185-
path.pop();
230+
}
231+
232+
private static class Frame {
233+
String patternName;
234+
List<String> path;
235+
int startIndex;
236+
237+
Frame(String patternName, List<String> path, int startIndex) {
238+
this.patternName = patternName;
239+
this.path = path;
240+
this.startIndex = startIndex;
241+
}
186242
}
187243

188244
private static void throwExceptionForCircularReference(String patternName, String pattern) {
@@ -192,13 +248,13 @@ private static void throwExceptionForCircularReference(String patternName, Strin
192248
private static void throwExceptionForCircularReference(
193249
String patternName,
194250
String pattern,
195-
String originPatterName,
196-
Stack<String> path
251+
String originPatternName,
252+
List<String> path
197253
) {
198254
StringBuilder message = new StringBuilder("circular reference in pattern [");
199255
message.append(patternName).append("][").append(pattern).append("]");
200-
if (originPatterName != null) {
201-
message.append(" back to pattern [").append(originPatterName).append("]");
256+
if (originPatternName != null) {
257+
message.append(" back to pattern [").append(originPatternName).append("]");
202258
}
203259
if (path != null && path.size() > 1) {
204260
message.append(" via patterns [").append(String.join("=>", path)).append("]");
@@ -217,9 +273,7 @@ private String groupMatch(String name, Region region, String pattern) {
217273
int begin = region.getBeg(number);
218274
int end = region.getEnd(number);
219275
return new String(pattern.getBytes(StandardCharsets.UTF_8), begin, end - begin, StandardCharsets.UTF_8);
220-
} catch (StringIndexOutOfBoundsException e) {
221-
return null;
222-
} catch (ValueException e) {
276+
} catch (StringIndexOutOfBoundsException | ValueException e) {
223277
return null;
224278
}
225279
}

libs/grok/src/test/java/org/opensearch/grok/GrokTests.java

+10
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,16 @@ public void testCircularReference() {
377377
"circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " + "via patterns [NAME1=>NAME2=>NAME3=>NAME4]",
378378
e.getMessage()
379379
);
380+
381+
e = expectThrows(IllegalArgumentException.class, () -> {
382+
Map<String, String> bank = new TreeMap<>();
383+
for (int i = 1; i <= 501; i++) {
384+
bank.put("NAME" + i, "!!!%{NAME" + (i + 1) + "}!!!");
385+
}
386+
String pattern = "%{NAME1}";
387+
new Grok(bank, pattern, false, logger::warn);
388+
});
389+
assertEquals("Pattern references exceeded maximum depth of 500", e.getMessage());
380390
}
381391

382392
public void testMalformedPattern() {

0 commit comments

Comments
 (0)